diff options
Diffstat (limited to 'tools')
444 files changed, 38612 insertions, 3868 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index caae4843cb70..16e006f708ca 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 04b3256f8e6d..4e76630dd655 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 833ed9a16adf..1b32b56a03d3 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) +#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h index 389c36fd8299..ac5ba55066dd 100644 --- a/tools/arch/powerpc/include/uapi/asm/unistd.h +++ b/tools/arch/powerpc/include/uapi/asm/unistd.h @@ -398,5 +398,6 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 +#define __NR_rseq 387 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 578793e97431..5701f5cecd31 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -207,13 +206,19 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ - +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -274,9 +279,12 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -334,6 +342,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) @@ -363,5 +372,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l index bd83149e7be0..4da8d053d68f 100644 --- a/tools/bpf/bpf_exp.l +++ b/tools/bpf/bpf_exp.l @@ -175,7 +175,7 @@ extern void yyerror(const char *str); yylval.number = strtol(yytext, NULL, 10); return number; } -([0][0-9]+) { +([0][0-7]+) { yylval.number = strtol(yytext + 1, NULL, 8); return number; } diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore new file mode 100644 index 000000000000..d7e678c2d396 --- /dev/null +++ b/tools/bpf/bpftool/.gitignore @@ -0,0 +1,3 @@ +*.d +bpftool +FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 0e4e923235b6..7b0e6d453e92 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -26,7 +26,9 @@ MAP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } +| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | +| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | +| **sendmsg4** | **sendmsg6** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -63,7 +65,17 @@ DESCRIPTION **egress** egress path of the inet socket (since 4.10); **sock_create** opening of an inet socket (since 4.10); **sock_ops** various socket operations (since 4.12); - **device** device access (since 4.15). + **device** device access (since 4.15); + **bind4** call to bind(2) for an inet4 socket (since 4.17); + **bind6** call to bind(2) for an inet6 socket (since 4.17); + **post_bind4** return from bind(2) for an inet4 socket (since 4.17); + **post_bind6** return from bind(2) for an inet6 socket (since 4.17); + **connect4** call to connect(2) for an inet4 socket (since 4.17); + **connect6** call to connect(2) for an inet6 socket (since 4.17); + **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an + unconnected udp4 socket (since 4.18); + **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an + unconnected udp6 socket (since 4.18). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 457e868bd32f..a6258bc8ec4f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -22,17 +22,19 @@ MAP COMMANDS ============= | **bpftool** **map { show | list }** [*MAP*] -| **bpftool** **map dump** *MAP* -| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] -| **bpftool** **map lookup** *MAP* **key** *BYTES* -| **bpftool** **map getnext** *MAP* [**key** *BYTES*] -| **bpftool** **map delete** *MAP* **key** *BYTES* -| **bpftool** **map pin** *MAP* *FILE* +| **bpftool** **map dump** *MAP* +| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] +| **bpftool** **map lookup** *MAP* **key** *DATA* +| **bpftool** **map getnext** *MAP* [**key** *DATA*] +| **bpftool** **map delete** *MAP* **key** *DATA* +| **bpftool** **map pin** *MAP* *FILE* +| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *DATA* := { [**hex**] *BYTES* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *VALUE* := { *BYTES* | *MAP* | *PROG* } +| *VALUE* := { *DATA* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } DESCRIPTION @@ -48,20 +50,26 @@ DESCRIPTION **bpftool map dump** *MAP* Dump all entries in a given *MAP*. - **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] + **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] Update map entry for a given *KEY*. *UPDATE_FLAGS* can be one of: **any** update existing entry or add if doesn't exit; **exist** update only if entry already exists; **noexist** update only if entry doesn't exist. - **bpftool map lookup** *MAP* **key** *BYTES* + If the **hex** keyword is provided in front of the bytes + sequence, the bytes are parsed as hexadeximal values, even if + no "0x" prefix is added. If the keyword is not provided, then + the bytes are parsed as decimal values, unless a "0x" prefix + (for hexadecimal) or a "0" prefix (for octal) is provided. + + **bpftool map lookup** *MAP* **key** *DATA* Lookup **key** in the map. - **bpftool map getnext** *MAP* [**key** *BYTES*] + **bpftool map getnext** *MAP* [**key** *DATA*] Get next key. If *key* is not specified, get first key. - **bpftool map delete** *MAP* **key** *BYTES* + **bpftool map delete** *MAP* **key** *DATA* Remove entry from the map. **bpftool map pin** *MAP* *FILE* @@ -69,6 +77,22 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. + **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] + Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. + + Install perf rings into a perf event array map and dump + output of any bpf_perf_event_output() call in the kernel. + By default read the number of CPUs on the system and + install perf ring for each CPU in the corresponding index + in the array. + + If **cpu** and **index** are specified, install perf ring + for given **cpu** at **index** in the array (single ring). + + Note that installing a perf ring into an array will silently + replace any existing ring. Any other application will stop + receiving events if it installed its rings earlier. + **bpftool map help** Print short help message. @@ -98,7 +122,12 @@ EXAMPLES 10: hash name some_map flags 0x0 key 4B value 8B max_entries 2048 memlock 167936B -**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04** +The following three commands are equivalent: + +| +| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c** +| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c** +| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76** **# bpftool map lookup id 10 key 0 1 2 3** diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst new file mode 100644 index 000000000000..e3eb0eab7641 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -0,0 +1,81 @@ +================ +bpftool-perf +================ +------------------------------------------------------------------------------- +tool for inspection of perf related bpf prog attachments +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **perf** *COMMAND* + + *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** | **help** } + +PERF COMMANDS +============= + +| **bpftool** **perf { show | list }** +| **bpftool** **perf help** + +DESCRIPTION +=========== + **bpftool perf { show | list }** + List all raw_tracepoint, tracepoint, kprobe attachment in the system. + + Output will start with process id and file descriptor in that process, + followed by bpf program id, attachment information, and attachment point. + The attachment point for raw_tracepoint/tracepoint is the trace probe name. + The attachment point for k[ret]probe is either symbol name and offset, + or a kernel virtual address. + The attachment point for u[ret]probe is the file name and the file offset. + + **bpftool perf help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== + +| **# bpftool perf** + +:: + + pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0 + pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0 + pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep + pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159 + +| +| **# bpftool -j perf** + +:: + + [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \ + {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \ + {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \ + {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}] + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 67ca6c69376c..43d34a5c3ec5 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -95,7 +95,7 @@ EXAMPLES **# bpftool prog show** :: - 10: xdp name some_prog tag 005a3d2123620c8b + 10: xdp name some_prog tag 005a3d2123620c8b gpl loaded_at Sep 29/20:11 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 @@ -108,6 +108,7 @@ EXAMPLES "id": 10, "type": "xdp", "tag": "005a3d2123620c8b", + "gpl_compatible": true, "loaded_at": "Sep 29/20:11", "uid": 0, "bytes_xlated": 528, diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 20689a321ffe..b6f5d560460d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,20 +16,22 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } *MAP-COMMANDS* := { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** - | **pin** | **help** } + | **pin** | **event_pipe** | **help** } *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | **load** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } + *PERF-COMMANDS* := { **show** | **list** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -56,3 +58,4 @@ OPTIONS SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4e69782c4a79..892dbf095bff 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -39,7 +39,12 @@ CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers -CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ + -I$(srctree)/kernel/bpf/ \ + -I$(srctree)/tools/include \ + -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/lib/bpf \ + -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' LIBS = -lelf -lbfd -lopcodes $(LIBBPF) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 490811b45fa7..1e1083321643 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -1,6 +1,6 @@ # bpftool(8) bash completion -*- shell-script -*- # -# Copyright (C) 2017 Netronome Systems, Inc. +# Copyright (C) 2017-2018 Netronome Systems, Inc. # # This software is dual licensed under the GNU General License # Version 2, June 1991 as shown in the file COPYING in the top-level @@ -79,6 +79,14 @@ _bpftool_get_map_ids() command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_perf_map_ids() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command grep -C2 perf_event_array | \ + command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + + _bpftool_get_prog_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ @@ -147,7 +155,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|key|opcodes|visual) + help|hex|opcodes|visual) return 0 ;; tag) @@ -283,7 +291,7 @@ _bpftool() return 0 ;; key) - return 0 + COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; *) _bpftool_once_attr 'key' @@ -302,7 +310,7 @@ _bpftool() return 0 ;; key) - return 0 + COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; value) # We can have bytes, or references to a prog or a @@ -321,6 +329,8 @@ _bpftool() return 0 ;; *) + COMPREPLY+=( $( compgen -W 'hex' \ + -- "$cur" ) ) return 0 ;; esac @@ -357,10 +367,34 @@ _bpftool() fi return 0 ;; + event_pipe) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_perf_map_ids + return 0 + ;; + cpu) + return 0 + ;; + index) + return 0 + ;; + *) + _bpftool_once_attr 'cpu' + _bpftool_once_attr 'index' + return 0 + ;; + esac + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin show list update' -- "$cur" ) ) + lookup pin event_pipe show list update' -- \ + "$cur" ) ) ;; esac ;; @@ -372,7 +406,8 @@ _bpftool() ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ - device' + device bind4 bind6 post_bind4 post_bind6 connect4 \ + connect6 sendmsg4 sendmsg6' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag' case $prev in @@ -380,7 +415,9 @@ _bpftool() _filedir return 0 ;; - ingress|egress|sock_create|sock_ops|device) + ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ + post_bind4|post_bind6|connect4|connect6|sendmsg4|\ + sendmsg6) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 @@ -412,6 +449,15 @@ _bpftool() ;; esac ;; + perf) + case $command in + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index cae32a61cb18..16bee011e16c 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -16,8 +16,11 @@ #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" -#define HELP_SPEC_ATTACH_TYPES \ - "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" +#define HELP_SPEC_ATTACH_TYPES \ + " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ + " sock_ops | device | bind4 | bind6 |\n" \ + " post_bind4 | post_bind6 | connect4 |\n" \ + " connect6 | sendmsg4 | sendmsg6 }" static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_INGRESS] = "ingress", @@ -25,6 +28,14 @@ static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", [BPF_CGROUP_SOCK_OPS] = "sock_ops", [BPF_CGROUP_DEVICE] = "device", + [BPF_CGROUP_INET4_BIND] = "bind4", + [BPF_CGROUP_INET6_BIND] = "bind6", + [BPF_CGROUP_INET4_CONNECT] = "connect4", + [BPF_CGROUP_INET6_CONNECT] = "connect6", + [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", + [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", + [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", + [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -282,7 +293,7 @@ static int do_help(int argc, char **argv) " %s %s detach CGROUP ATTACH_TYPE PROG\n" " %s %s help\n" "\n" - " " HELP_SPEC_ATTACH_TYPES "\n" + HELP_SPEC_ATTACH_TYPES "\n" " " HELP_SPEC_ATTACH_FLAGS "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 465995281dcd..32f9e397a6c0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -33,6 +33,7 @@ /* Author: Jakub Kicinski <kubakici@wp.pl> */ +#include <ctype.h> #include <errno.h> #include <fcntl.h> #include <fts.h> @@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key) return NULL; } +void print_data_json(uint8_t *data, size_t len) +{ + unsigned int i; + + jsonw_start_array(json_wtr); + for (i = 0; i < len; i++) + jsonw_printf(json_wtr, "%d", data[i]); + jsonw_end_array(json_wtr); +} + void print_hex_data_json(uint8_t *data, size_t len) { unsigned int i; @@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) } } +unsigned int get_page_size(void) +{ + static int result; + + if (!result) + result = getpagesize(); + return result; +} + +unsigned int get_possible_cpus(void) +{ + static unsigned int result; + char buf[128]; + long int n; + char *ptr; + int fd; + + if (result) + return result; + + fd = open("/sys/devices/system/cpu/possible", O_RDONLY); + if (fd < 0) { + p_err("can't open sysfs possible cpus"); + exit(-1); + } + + n = read(fd, buf, sizeof(buf)); + if (n < 2) { + p_err("can't read sysfs possible cpus"); + exit(-1); + } + close(fd); + + if (n == sizeof(buf)) { + p_err("read sysfs possible cpus overflow"); + exit(-1); + } + + ptr = buf; + n = 0; + while (*ptr && *ptr != '\n') { + unsigned int a, b; + + if (sscanf(ptr, "%u-%u", &a, &b) == 2) { + n += b - a + 1; + + ptr = strchr(ptr, '-') + 1; + } else if (sscanf(ptr, "%u", &a) == 1) { + n++; + } else { + assert(0); + } + + while (isdigit(*ptr)) + ptr++; + if (*ptr == ',') + ptr++; + } + + result = n; + + return result; +} + static char * ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) { diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 1ec852d21d44..eea7f14355f3 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -87,7 +87,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup }\n" + " OBJECT := { prog | map | cgroup | perf }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -216,6 +216,7 @@ static const struct cmd cmds[] = { { "prog", do_prog }, { "map", do_map }, { "cgroup", do_cgroup }, + { "perf", do_perf }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index b8e9584d6246..63fdb310b9a4 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -117,14 +117,20 @@ int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); +int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); +int do_perf(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const char *arch); +void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); +unsigned int get_page_size(void); +unsigned int get_possible_cpus(void); const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index f509c86faede..097b1a5e046b 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -34,7 +34,6 @@ /* Author: Jakub Kicinski <kubakici@wp.pl> */ #include <assert.h> -#include <ctype.h> #include <errno.h> #include <fcntl.h> #include <stdbool.h> @@ -67,63 +66,9 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", }; -static unsigned int get_possible_cpus(void) -{ - static unsigned int result; - char buf[128]; - long int n; - char *ptr; - int fd; - - if (result) - return result; - - fd = open("/sys/devices/system/cpu/possible", O_RDONLY); - if (fd < 0) { - p_err("can't open sysfs possible cpus"); - exit(-1); - } - - n = read(fd, buf, sizeof(buf)); - if (n < 2) { - p_err("can't read sysfs possible cpus"); - exit(-1); - } - close(fd); - - if (n == sizeof(buf)) { - p_err("read sysfs possible cpus overflow"); - exit(-1); - } - - ptr = buf; - n = 0; - while (*ptr && *ptr != '\n') { - unsigned int a, b; - - if (sscanf(ptr, "%u-%u", &a, &b) == 2) { - n += b - a + 1; - - ptr = strchr(ptr, '-') + 1; - } else if (sscanf(ptr, "%u", &a) == 1) { - n++; - } else { - assert(0); - } - - while (isdigit(*ptr)) - ptr++; - if (*ptr == ',') - ptr++; - } - - result = n; - - return result; -} - static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -186,8 +131,7 @@ static int map_parse_fd(int *argc, char ***argv) return -1; } -static int -map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) { int err; int fd; @@ -283,11 +227,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, static char **parse_bytes(char **argv, const char *name, unsigned char *val, unsigned int n) { - unsigned int i = 0; + unsigned int i = 0, base = 0; char *endptr; + if (is_prefix(*argv, "hex")) { + base = 16; + argv++; + } + while (i < n && argv[i]) { - val[i] = strtoul(argv[i], &endptr, 0); + val[i] = strtoul(argv[i], &endptr, base); if (*endptr) { p_err("error parsing byte: %s", argv[i]); return NULL; @@ -868,23 +817,25 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [MAP]\n" - " %s %s dump MAP\n" - " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" - " %s %s lookup MAP key BYTES\n" - " %s %s getnext MAP [key BYTES]\n" - " %s %s delete MAP key BYTES\n" - " %s %s pin MAP FILE\n" + " %s %s dump MAP\n" + " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" + " %s %s lookup MAP key DATA\n" + " %s %s getnext MAP [key DATA]\n" + " %s %s delete MAP key DATA\n" + " %s %s pin MAP FILE\n" + " %s %s event_pipe MAP [cpu N index M]\n" " %s %s help\n" "\n" " MAP := { id MAP_ID | pinned FILE }\n" + " DATA := { [hex] BYTES }\n" " " HELP_SPEC_PROGRAM "\n" - " VALUE := { BYTES | MAP | PROG }\n" + " VALUE := { DATA | MAP | PROG }\n" " UPDATE_FLAGS := { any | exist | noexist }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -899,6 +850,7 @@ static const struct cmd cmds[] = { { "getnext", do_getnext }, { "delete", do_delete }, { "pin", do_pin }, + { "event_pipe", do_event_pipe }, { 0 } }; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c new file mode 100644 index 000000000000..1832100d1b27 --- /dev/null +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2018 Netronome Systems, Inc. */ +/* This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <errno.h> +#include <fcntl.h> +#include <libbpf.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include <bpf.h> +#include <perf-sys.h> + +#include "main.h" + +#define MMAP_PAGE_CNT 16 + +static bool stop; + +struct event_ring_info { + int fd; + int key; + unsigned int cpu; + void *mem; +}; + +struct perf_event_sample { + struct perf_event_header header; + u64 time; + __u32 size; + unsigned char data[]; +}; + +static void int_exit(int signo) +{ + fprintf(stderr, "Stopping...\n"); + stop = true; +} + +static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) +{ + struct event_ring_info *ring = priv; + struct perf_event_sample *e = event; + struct { + struct perf_event_header header; + __u64 id; + __u64 lost; + } *lost = event; + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "type"); + jsonw_uint(json_wtr, e->header.type); + jsonw_name(json_wtr, "cpu"); + jsonw_uint(json_wtr, ring->cpu); + jsonw_name(json_wtr, "index"); + jsonw_uint(json_wtr, ring->key); + if (e->header.type == PERF_RECORD_SAMPLE) { + jsonw_name(json_wtr, "timestamp"); + jsonw_uint(json_wtr, e->time); + jsonw_name(json_wtr, "data"); + print_data_json(e->data, e->size); + } else if (e->header.type == PERF_RECORD_LOST) { + jsonw_name(json_wtr, "lost"); + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "id"); + jsonw_uint(json_wtr, lost->id); + jsonw_name(json_wtr, "count"); + jsonw_uint(json_wtr, lost->lost); + jsonw_end_object(json_wtr); + } + jsonw_end_object(json_wtr); + } else { + if (e->header.type == PERF_RECORD_SAMPLE) { + printf("== @%lld.%09lld CPU: %d index: %d =====\n", + e->time / 1000000000ULL, e->time % 1000000000ULL, + ring->cpu, ring->key); + fprint_hex(stdout, e->data, e->size, " "); + printf("\n"); + } else if (e->header.type == PERF_RECORD_LOST) { + printf("lost %lld events\n", lost->lost); + } else { + printf("unknown event type=%d size=%d\n", + e->header.type, e->header.size); + } + } + + return LIBBPF_PERF_EVENT_CONT; +} + +static void +perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len) +{ + enum bpf_perf_event_ret ret; + + ret = bpf_perf_event_read_simple(ring->mem, + MMAP_PAGE_CNT * get_page_size(), + get_page_size(), buf, buf_len, + print_bpf_output, ring); + if (ret != LIBBPF_PERF_EVENT_CONT) { + fprintf(stderr, "perf read loop failed with %d\n", ret); + stop = true; + } +} + +static int perf_mmap_size(void) +{ + return get_page_size() * (MMAP_PAGE_CNT + 1); +} + +static void *perf_event_mmap(int fd) +{ + int mmap_size = perf_mmap_size(); + void *base; + + base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (base == MAP_FAILED) { + p_err("event mmap failed: %s\n", strerror(errno)); + return NULL; + } + + return base; +} + +static void perf_event_unmap(void *mem) +{ + if (munmap(mem, perf_mmap_size())) + fprintf(stderr, "Can't unmap ring memory!\n"); +} + +static int bpf_perf_event_open(int map_fd, int key, int cpu) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + }; + int pmu_fd; + + pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (pmu_fd < 0) { + p_err("failed to open perf event %d for CPU %d", key, cpu); + return -1; + } + + if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) { + p_err("failed to update map for event %d for CPU %d", key, cpu); + goto err_close; + } + if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + p_err("failed to enable event %d for CPU %d", key, cpu); + goto err_close; + } + + return pmu_fd; + +err_close: + close(pmu_fd); + return -1; +} + +int do_event_pipe(int argc, char **argv) +{ + int i, nfds, map_fd, index = -1, cpu = -1; + struct bpf_map_info map_info = {}; + struct event_ring_info *rings; + size_t tmp_buf_sz = 0; + void *tmp_buf = NULL; + struct pollfd *pfds; + __u32 map_info_len; + bool do_all = true; + + map_info_len = sizeof(map_info); + map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); + if (map_fd < 0) + return -1; + + if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + p_err("map is not a perf event array"); + goto err_close_map; + } + + while (argc) { + if (argc < 2) + BAD_ARG(); + + if (is_prefix(*argv, "cpu")) { + char *endptr; + + NEXT_ARG(); + cpu = strtoul(*argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as CPU ID", **argv); + goto err_close_map; + } + + NEXT_ARG(); + } else if (is_prefix(*argv, "index")) { + char *endptr; + + NEXT_ARG(); + index = strtoul(*argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as index", **argv); + goto err_close_map; + } + + NEXT_ARG(); + } else { + BAD_ARG(); + } + + do_all = false; + } + + if (!do_all) { + if (index == -1 || cpu == -1) { + p_err("cpu and index must be specified together"); + goto err_close_map; + } + + nfds = 1; + } else { + nfds = min(get_possible_cpus(), map_info.max_entries); + cpu = 0; + index = 0; + } + + rings = calloc(nfds, sizeof(rings[0])); + if (!rings) + goto err_close_map; + + pfds = calloc(nfds, sizeof(pfds[0])); + if (!pfds) + goto err_free_rings; + + for (i = 0; i < nfds; i++) { + rings[i].cpu = cpu + i; + rings[i].key = index + i; + + rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key, + rings[i].cpu); + if (rings[i].fd < 0) + goto err_close_fds_prev; + + rings[i].mem = perf_event_mmap(rings[i].fd); + if (!rings[i].mem) + goto err_close_fds_current; + + pfds[i].fd = rings[i].fd; + pfds[i].events = POLLIN; + } + + signal(SIGINT, int_exit); + signal(SIGHUP, int_exit); + signal(SIGTERM, int_exit); + + if (json_output) + jsonw_start_array(json_wtr); + + while (!stop) { + poll(pfds, nfds, 200); + for (i = 0; i < nfds; i++) + perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz); + } + free(tmp_buf); + + if (json_output) + jsonw_end_array(json_wtr); + + for (i = 0; i < nfds; i++) { + perf_event_unmap(rings[i].mem); + close(rings[i].fd); + } + free(pfds); + free(rings); + close(map_fd); + + return 0; + +err_close_fds_prev: + while (i--) { + perf_event_unmap(rings[i].mem); +err_close_fds_current: + close(rings[i].fd); + } + free(pfds); +err_free_rings: + free(rings); +err_close_map: + close(map_fd); + return -1; +} diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c new file mode 100644 index 000000000000..b76b77dcfd1f --- /dev/null +++ b/tools/bpf/bpftool/perf.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook +// Author: Yonghong Song <yhs@fb.com> + +#define _GNU_SOURCE +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <ftw.h> + +#include <bpf.h> + +#include "main.h" + +/* 0: undecided, 1: supported, 2: not supported */ +static int perf_query_supported; +static bool has_perf_query_support(void) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + char buf[256]; + int fd; + + if (perf_query_supported) + goto out; + + fd = open("/", O_RDONLY); + if (fd < 0) { + p_err("perf_query_support: cannot open directory \"/\" (%s)", + strerror(errno)); + goto out; + } + + /* the following query will fail as no bpf attachment, + * the expected errno is ENOTSUPP + */ + errno = 0; + len = sizeof(buf); + bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + + if (errno == 524 /* ENOTSUPP */) { + perf_query_supported = 1; + goto close_fd; + } + + perf_query_supported = 2; + p_err("perf_query_support: %s", strerror(errno)); + fprintf(stderr, + "HINT: non root or kernel doesn't support TASK_FD_QUERY\n"); + +close_fd: + close(fd); +out: + return perf_query_supported == 1; +} + +static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type, + char *buf, __u64 probe_offset, __u64 probe_addr) +{ + jsonw_start_object(json_wtr); + jsonw_int_field(json_wtr, "pid", pid); + jsonw_int_field(json_wtr, "fd", fd); + jsonw_uint_field(json_wtr, "prog_id", prog_id); + switch (fd_type) { + case BPF_FD_TYPE_RAW_TRACEPOINT: + jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint"); + jsonw_string_field(json_wtr, "tracepoint", buf); + break; + case BPF_FD_TYPE_TRACEPOINT: + jsonw_string_field(json_wtr, "fd_type", "tracepoint"); + jsonw_string_field(json_wtr, "tracepoint", buf); + break; + case BPF_FD_TYPE_KPROBE: + jsonw_string_field(json_wtr, "fd_type", "kprobe"); + if (buf[0] != '\0') { + jsonw_string_field(json_wtr, "func", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + } else { + jsonw_lluint_field(json_wtr, "addr", probe_addr); + } + break; + case BPF_FD_TYPE_KRETPROBE: + jsonw_string_field(json_wtr, "fd_type", "kretprobe"); + if (buf[0] != '\0') { + jsonw_string_field(json_wtr, "func", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + } else { + jsonw_lluint_field(json_wtr, "addr", probe_addr); + } + break; + case BPF_FD_TYPE_UPROBE: + jsonw_string_field(json_wtr, "fd_type", "uprobe"); + jsonw_string_field(json_wtr, "filename", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + break; + case BPF_FD_TYPE_URETPROBE: + jsonw_string_field(json_wtr, "fd_type", "uretprobe"); + jsonw_string_field(json_wtr, "filename", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + break; + } + jsonw_end_object(json_wtr); +} + +static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type, + char *buf, __u64 probe_offset, __u64 probe_addr) +{ + printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id); + switch (fd_type) { + case BPF_FD_TYPE_RAW_TRACEPOINT: + printf("raw_tracepoint %s\n", buf); + break; + case BPF_FD_TYPE_TRACEPOINT: + printf("tracepoint %s\n", buf); + break; + case BPF_FD_TYPE_KPROBE: + if (buf[0] != '\0') + printf("kprobe func %s offset %llu\n", buf, + probe_offset); + else + printf("kprobe addr %llu\n", probe_addr); + break; + case BPF_FD_TYPE_KRETPROBE: + if (buf[0] != '\0') + printf("kretprobe func %s offset %llu\n", buf, + probe_offset); + else + printf("kretprobe addr %llu\n", probe_addr); + break; + case BPF_FD_TYPE_UPROBE: + printf("uprobe filename %s offset %llu\n", buf, probe_offset); + break; + case BPF_FD_TYPE_URETPROBE: + printf("uretprobe filename %s offset %llu\n", buf, + probe_offset); + break; + } +} + +static int show_proc(const char *fpath, const struct stat *sb, + int tflag, struct FTW *ftwbuf) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + int err, pid = 0, fd = 0; + const char *pch; + char buf[4096]; + + /* prefix always /proc */ + pch = fpath + 5; + if (*pch == '\0') + return 0; + + /* pid should be all numbers */ + pch++; + while (isdigit(*pch)) { + pid = pid * 10 + *pch - '0'; + pch++; + } + if (*pch == '\0') + return 0; + if (*pch != '/') + return FTW_SKIP_SUBTREE; + + /* check /proc/<pid>/fd directory */ + pch++; + if (strncmp(pch, "fd", 2)) + return FTW_SKIP_SUBTREE; + pch += 2; + if (*pch == '\0') + return 0; + if (*pch != '/') + return FTW_SKIP_SUBTREE; + + /* check /proc/<pid>/fd/<fd_num> */ + pch++; + while (isdigit(*pch)) { + fd = fd * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + return FTW_SKIP_SUBTREE; + + /* query (pid, fd) for potential perf events */ + len = sizeof(buf); + err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type, + &probe_offset, &probe_addr); + if (err < 0) + return 0; + + if (json_output) + print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset, + probe_addr); + else + print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset, + probe_addr); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + int flags = FTW_ACTIONRETVAL | FTW_PHYS; + int err = 0, nopenfd = 16; + + if (!has_perf_query_support()) + return -1; + + if (json_output) + jsonw_start_array(json_wtr); + if (nftw("/proc", show_proc, nopenfd, flags) == -1) { + p_err("%s", strerror(errno)); + err = -1; + } + if (json_output) + jsonw_end_array(json_wtr); + + return err; +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s { show | list | help }\n" + "", + bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_perf(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f7a810897eac..959aa53ab678 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -68,6 +68,10 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", [BPF_PROG_TYPE_SK_SKB] = "sk_skb", [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) @@ -86,14 +90,19 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) } wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + - nsecs / 1000000000; + (real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) / + 1000000000; + if (!localtime_r(&wallclock_secs, &load_tm)) { snprintf(buf, size, "%llu", nsecs / 1000000000); return; } - strftime(buf, size, "%b %d/%H:%M", &load_tm); + if (json_output) + strftime(buf, size, "%s", &load_tm); + else + strftime(buf, size, "%FT%T%z", &load_tm); } static int prog_fd_by_tag(unsigned char *tag) @@ -232,6 +241,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -240,7 +251,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) print_boot_time(info->load_time, buf, sizeof(buf)); /* Piggy back on load_time, since 0 uid is a valid one */ - jsonw_string_field(json_wtr, "loaded_at", buf); + jsonw_name(json_wtr, "loaded_at"); + jsonw_printf(json_wtr, "%s", buf); jsonw_uint_field(json_wtr, "uid", info->created_by_uid); } @@ -292,6 +304,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("%s", info->gpl_compatible ? " gpl" : ""); printf("\n"); if (info->load_time) { @@ -410,7 +423,11 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; + unsigned int *func_lens = NULL; + unsigned int nr_func_ksyms; + unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; @@ -486,10 +503,34 @@ static int do_dump(int argc, char **argv) return -1; } + nr_func_ksyms = info.nr_jited_ksyms; + if (nr_func_ksyms) { + func_ksyms = malloc(nr_func_ksyms * sizeof(__u64)); + if (!func_ksyms) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + nr_func_lens = info.nr_jited_func_lens; + if (nr_func_lens) { + func_lens = malloc(nr_func_lens * sizeof(__u32)); + if (!func_lens) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); *member_len = buf_size; + info.jited_ksyms = ptr_to_u64(func_ksyms); + info.nr_jited_ksyms = nr_func_ksyms; + info.jited_func_lens = ptr_to_u64(func_lens); + info.nr_jited_func_lens = nr_func_lens; err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -503,6 +544,16 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.nr_jited_ksyms > nr_func_ksyms) { + p_err("too many addresses returned"); + goto err_free; + } + + if (info.nr_jited_func_lens > nr_func_lens) { + p_err("too many values returned"); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -540,7 +591,57 @@ static int do_dump(int argc, char **argv) goto err_free; } - disasm_print_insn(buf, *member_len, opcodes, name); + if (info.nr_jited_func_lens && info.jited_func_lens) { + struct kernel_sym *sym = NULL; + char sym_name[SYM_MAX_NAME]; + unsigned char *img = buf; + __u64 *ksyms = NULL; + __u32 *lens; + __u32 i; + + if (info.nr_jited_ksyms) { + kernel_syms_load(&dd); + ksyms = (__u64 *) info.jited_ksyms; + } + + if (json_output) + jsonw_start_array(json_wtr); + + lens = (__u32 *) info.jited_func_lens; + for (i = 0; i < info.nr_jited_func_lens; i++) { + if (ksyms) { + sym = kernel_syms_search(&dd, ksyms[i]); + if (sym) + sprintf(sym_name, "%s", sym->name); + else + sprintf(sym_name, "0x%016llx", ksyms[i]); + } else { + strcpy(sym_name, "unknown"); + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "name"); + jsonw_string(json_wtr, sym_name); + jsonw_name(json_wtr, "insns"); + } else { + printf("%s:\n", sym_name); + } + + disasm_print_insn(img, lens[i], opcodes, name); + img += lens[i]; + + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + } + + if (json_output) + jsonw_end_array(json_wtr); + } else { + disasm_print_insn(buf, *member_len, opcodes, name); + } } else if (visual) { if (json_output) jsonw_null(json_wtr); @@ -548,6 +649,9 @@ static int do_dump(int argc, char **argv) dump_xlated_cfg(buf, *member_len); } else { kernel_syms_load(&dd); + dd.nr_jited_ksyms = info.nr_jited_ksyms; + dd.jited_ksyms = (__u64 *) info.jited_ksyms; + if (json_output) dump_xlated_json(&dd, buf, *member_len, opcodes); else @@ -556,10 +660,14 @@ static int do_dump(int argc, char **argv) } free(buf); + free(func_ksyms); + free(func_lens); return 0; err_free: free(buf); + free(func_ksyms); + free(func_lens); return -1; } @@ -586,15 +694,19 @@ static int do_load(int argc, char **argv) return -1; } - if (do_pin_fd(prog_fd, argv[1])) { - p_err("failed to pin program"); - return -1; - } + if (do_pin_fd(prog_fd, argv[1])) + goto err_close_obj; if (json_output) jsonw_null(json_wtr); + bpf_object__close(obj); + return 0; + +err_close_obj: + bpf_object__close(obj); + return -1; } static int do_help(int argc, char **argv) diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 7a3173b76c16..b97f1da60dd1 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd) free(dd->sym_mapping); } -static struct kernel_sym *kernel_syms_search(struct dump_data *dd, - unsigned long key) +struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) { struct kernel_sym sym = { .address = key, @@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd, unsigned long address, const struct bpf_insn *insn) { - if (sym) + if (!dd->nr_jited_ksyms) + /* Do not show address for interpreted programs */ + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d", insn->off); + else if (sym) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "%+d#%s", insn->off, sym->name); else @@ -203,6 +207,10 @@ static const char *print_call(void *private_data, unsigned long address = dd->address_call_base + insn->imm; struct kernel_sym *sym; + if (insn->src_reg == BPF_PSEUDO_CALL && + (__u32) insn->imm < dd->nr_jited_ksyms) + address = dd->jited_ksyms[insn->imm]; + sym = kernel_syms_search(dd, address); if (insn->src_reg == BPF_PSEUDO_CALL) return print_call_pcrel(dd, sym, address, insn); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index b34affa7ef2d..33d86e2b369b 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -49,11 +49,14 @@ struct dump_data { unsigned long address_call_base; struct kernel_sym *sym_mapping; __u32 sym_count; + __u64 *jited_ksyms; + __u32 nr_jited_ksyms; char scratch_buff[SYM_MAX_NAME + 8]; }; void kernel_syms_load(struct dump_data *dd); void kernel_syms_destroy(struct dump_data *dd); +struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key); void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, bool opcodes); void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, diff --git a/tools/build/Build.include b/tools/build/Build.include index a4bbb984941d..950c1504ca37 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)), $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd, \ - printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ - printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd; \ cat $(depfile) >> $(dot-target).cmd; \ printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) @@ -98,4 +98,4 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) diff --git a/tools/build/Makefile b/tools/build/Makefile index 5eb4b5ad79cb..5edf65e684ab 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -43,7 +43,7 @@ $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o - $(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $< + $(QUIET_LINK)$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< FORCE: diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index f0c6f54a8b2f..3040830d7797 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -248,7 +248,7 @@ void print_usage(void) "Capture, convert and output data from IIO device buffer\n" " -a Auto-activate all available channels\n" " -A Force-activate ALL channels\n" - " -c <n> Do n conversions\n" + " -c <n> Do n conversions, or loop forever if n < 0\n" " -e Disable wait for event (new data)\n" " -g Use trigger-less mode\n" " -l <n> Set buffer length to n samples\n" @@ -330,11 +330,14 @@ static const struct option longopts[] = { int main(int argc, char **argv) { - unsigned long num_loops = 2; + unsigned long long num_loops = 2; unsigned long timedelay = 1000000; unsigned long buf_len = 128; - int ret, c, i, j, toread; + ssize_t i; + unsigned long long j; + unsigned long toread; + int ret, c; int fp = -1; int num_channels = 0; @@ -366,7 +369,7 @@ int main(int argc, char **argv) break; case 'c': errno = 0; - num_loops = strtoul(optarg, &dummy, 10); + num_loops = strtoll(optarg, &dummy, 10); if (errno) { ret = -errno; goto error; @@ -634,7 +637,7 @@ int main(int argc, char **argv) goto error; } - for (j = 0; j < num_loops; j++) { + for (j = 0; j < num_loops || num_loops < 0; j++) { if (!noevents) { struct pollfd pfd = { .fd = fp, diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index a3a4427441bf..70fe61295733 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -21,6 +21,9 @@ /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#ifndef __pure +#define __pure __attribute__((pure)) +#endif #define noinline __attribute__((noinline)) #ifndef __packed #define __packed __attribute__((packed)) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index c5e512da8d8a..af55acf73e75 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -263,6 +263,16 @@ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) +/* Relative call */ + +#define BPF_CALL_REL(TGT) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = BPF_PSEUDO_CALL, \ + .off = 0, \ + .imm = TGT }) + /* Program exit */ #define BPF_EXIT_INSN() \ diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..8dd6aefdafa4 --- /dev/null +++ b/tools/include/uapi/asm/bitsperlong.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/uapi/asm/bitsperlong.h" +#elif defined(__aarch64__) +#include "../../arch/arm64/include/uapi/asm/bitsperlong.h" +#elif defined(__powerpc__) +#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h" +#elif defined(__s390__) +#include "../../arch/s390/include/uapi/asm/bitsperlong.h" +#elif defined(__sparc__) +#include "../../arch/sparc/include/uapi/asm/bitsperlong.h" +#elif defined(__mips__) +#include "../../arch/mips/include/uapi/asm/bitsperlong.h" +#elif defined(__ia64__) +#include "../../arch/ia64/include/uapi/asm/bitsperlong.h" +#else +#include <asm-generic/bitsperlong.h> +#endif diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h new file mode 100644 index 000000000000..ce3c5945a1c4 --- /dev/null +++ b/tools/include/uapi/asm/errno.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/uapi/asm/errno.h" +#elif defined(__powerpc__) +#include "../../arch/powerpc/include/uapi/asm/errno.h" +#elif defined(__sparc__) +#include "../../arch/sparc/include/uapi/asm/errno.h" +#elif defined(__alpha__) +#include "../../arch/alpha/include/uapi/asm/errno.h" +#elif defined(__mips__) +#include "../../arch/mips/include/uapi/asm/errno.h" +#elif defined(__ia64__) +#include "../../arch/ia64/include/uapi/asm/errno.h" +#elif defined(__xtensa__) +#include "../../arch/xtensa/include/uapi/asm/errno.h" +#else +#include <asm-generic/errno.h> +#endif diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 6fdff5945c8a..9c660e1688ab 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -680,6 +680,13 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_ATOMIC 3 +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c5ec89732a8d..59b19b6a40d7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -95,6 +95,9 @@ enum bpf_cmd { BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, }; enum bpf_map_type { @@ -115,6 +118,8 @@ enum bpf_map_type { BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_CPUMAP, + BPF_MAP_TYPE_XSKMAP, + BPF_MAP_TYPE_SOCKHASH, }; enum bpf_prog_type { @@ -137,6 +142,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_RAW_TRACEPOINT, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_PROG_TYPE_LWT_SEG6LOCAL, + BPF_PROG_TYPE_LIRC_MODE2, }; enum bpf_attach_type { @@ -154,6 +161,9 @@ enum bpf_attach_type { BPF_CGROUP_INET6_CONNECT, BPF_CGROUP_INET4_POST_BIND, BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_LIRC_MODE2, __MAX_BPF_ATTACH_TYPE }; @@ -279,6 +289,9 @@ union bpf_attr { */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -339,6 +352,7 @@ union bpf_attr { __u32 start_id; __u32 prog_id; __u32 map_id; + __u32 btf_id; }; __u32 next_id; __u32 open_flags; @@ -363,398 +377,1704 @@ union bpf_attr { __u64 name; __u32 prog_fd; } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; } __attribute__((aligned(8))); -/* BPF helper function descriptions: - * - * void *bpf_map_lookup_elem(&map, &key) - * Return: Map value or NULL - * - * int bpf_map_update_elem(&map, &key, &value, flags) - * Return: 0 on success or negative error - * - * int bpf_map_delete_elem(&map, &key) - * Return: 0 on success or negative error - * - * int bpf_probe_read(void *dst, int size, void *src) - * Return: 0 on success or negative error +/* The description below is an attempt at providing documentation to eBPF + * developers about the multiple available eBPF helper functions. It can be + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * + * $ ./scripts/bpf_helpers_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 + * + * Note that in order to produce this external documentation, some RST + * formatting is used in the descriptions to get "bold" and "italics" in + * manual pages. Also note that the few trailing white spaces are + * intentional, removing them would break paragraphs for rst2man. + * + * Start of BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + * Description + * Perform a lookup in *map* for an entry associated to *key*. + * Return + * Map value associated to *key*, or **NULL** if no entry was + * found. + * + * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * Description + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * Description + * Delete entry with *key* from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from + * address *src* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_ktime_get_ns(void) - * Return: current ktime - * - * int bpf_trace_printk(const char *fmt, int fmt_size, ...) - * Return: length of buffer written or negative error - * - * u32 bpf_prandom_u32(void) - * Return: random value - * - * u32 bpf_raw_smp_processor_id(void) - * Return: SMP processor ID - * - * int bpf_skb_store_bytes(skb, offset, from, len, flags) - * store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l3_csum_replace(skb, offset, from, to, flags) - * recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l4_csum_replace(skb, offset, from, to, flags) - * recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_tail_call(ctx, prog_array_map, index) - * jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: 32-bit index inside array that selects specific program to run - * Return: 0 on success or negative error - * - * int bpf_clone_redirect(skb, ifindex, flags) - * redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success or negative error + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Return + * Current *ktime*. + * + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * Description + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * u32 bpf_get_prandom_u32(void) + * Description + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * Return + * A random 32-bit unsigned value. + * + * u32 bpf_get_smp_processor_id(void) + * Description + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * Return + * The SMP id of the processor running the program. + * + * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * Description + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * Description + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * Description + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * Description + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid + * Return + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - * - * int bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success or negative error - * - * u32 bpf_get_cgroup_classid(skb) - * retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - * - * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) - * Return: 0 on success or negative error - * - * int bpf_skb_vlan_pop(skb) - * Return: 0 on success or negative error - * - * int bpf_skb_get_tunnel_key(skb, key, size, flags) - * int bpf_skb_set_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Return: 0 on success or negative error - * - * u64 bpf_perf_event_read(map, flags) - * read perf event counter value - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * Return: value of perf event counter read or error code - * - * int bpf_redirect(ifindex, flags) - * redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: - * cls_bpf: - * bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * xdp_bpf: - * all bits - reserved - * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error - * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error - * int bpf_redirect_map(map, key, flags) - * redirect to endpoint in map - * @map: pointer to dev map - * @key: index in map to lookup - * @flags: -- - * Return: XDP_REDIRECT on success or XDP_ABORT on error - * - * u32 bpf_get_route_realm(skb) - * retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - * - * int bpf_perf_event_output(ctx, map, flags, data, size) - * output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success or negative error - * - * int bpf_get_stackid(ctx, map, flags) - * walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - * - * s64 bpf_csum_diff(from, from_size, to, to_size, seed) - * calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result or negative error code - * - * int bpf_skb_get_tunnel_opt(skb, opt, size) - * retrieve tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: option size - * - * int bpf_skb_set_tunnel_opt(skb, opt, size) - * populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success or negative error - * - * int bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is v4 -> v6, - * v6 -> v4 transitions. The helper will also resize the skb. eBPF - * program is expected to fill the new headers via skb_store_bytes - * and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - * - * int bpf_skb_under_cgroup(skb, map, index) - * Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - * - * u32 bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash + * Return + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + * + * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * Description + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * Return + * 0 on success, or a negative error in case of failure. + * + * u32 bpf_get_cgroup_classid(struct sk_buff *skb) + * Description + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/cgroup-v1/net_cls.txt*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * Return + * The classid, or 0 for the default unconfigured classid. + * + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * Description + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_vlan_pop(struct sk_buff *skb) + * Description + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) + * Description + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read**\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with **bpf_perf_event_read_value**\ (), which at the same + * time provides more features over the **bpf_perf_event_read**\ + * () interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * Return + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + * + * int bpf_redirect(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * Return + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + * + * u32 bpf_get_route_realm(struct sk_buff *skb) + * Description + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * indentifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * Return + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + * + * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * Description + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * Description + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * The positive or null stack id on success, or a negative error + * in case of failure. + * + * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) + * Description + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * Return + * The checksum result, or a negative error code in case of + * failure. + * + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * Return + * The size of the option data retrieved. + * + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * Description + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * Description + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * Description + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + * + * u32 bpf_get_hash_recalc(struct sk_buff *skb) + * Description + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * Return + * The 32-bit hash. * * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - * - * int bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - * - * int bpf_current_task_under_cgroup(map, index) - * Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - * - * int bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, to be used f.e. - * with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the skb is non-linear - * and not all of len are part of the linear section. Only needed for - * read/write with direct packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - * - * s64 bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - * - * void bpf_set_hash_invalid(skb) - * Invalidate current skb->hash. - * @skb: pointer to skb - * - * int bpf_get_numa_node_id() - * Return: Id of current NUMA node. - * - * int bpf_skb_change_head() - * Grows headroom of skb and adjusts MAC header offset accordingly. - * Will extends/reallocae as required automatically. - * May change skb data pointer and will thus invalidate any check - * performed for direct packet access. - * @skb: pointer to skb - * @len: length of header to be pushed in front - * @flags: Flags (unused for now) - * Return: 0 on success or negative error - * - * int bpf_xdp_adjust_head(xdp_md, delta) - * Adjust the xdp_md.data by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data - * Return: 0 on success or negative on error + * Return + * A pointer to the current task struct. + * + * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * Description + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * Description + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * Description + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) + * Description + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * Return + * The checksum on success, or a negative error code in case of + * failure. + * + * void bpf_set_hash_invalid(struct sk_buff *skb) + * Description + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + * int bpf_get_numa_node_id(void) + * Description + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * Return + * The id of current NUMA node. + * + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) - * Copy a NUL terminated string from unsafe address. In case the string - * length is smaller than size, the target is not padded with further NUL - * bytes. In case the string length is larger than size, just count-1 - * bytes are copied and the last byte is set to NUL. - * @dst: destination address - * @size: maximum number of bytes to copy, including the trailing NUL - * @unsafe_ptr: unsafe address - * Return: - * > 0 length of the string including the trailing NUL on success - * < 0 error - * - * u64 bpf_get_socket_cookie(skb) - * Get the cookie for the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: 8 Bytes non-decreasing number on success or 0 if the socket - * field is missing inside sk_buff - * - * u32 bpf_get_socket_uid(skb) - * Get the owner uid of the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: uid of the socket owner on success or overflowuid if failed. - * - * u32 bpf_set_hash(skb, hash) - * Set full skb->hash. - * @skb: pointer to skb - * @hash: hash to set - * - * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) - * Calls setsockopt. Not all opts are available, only those with - * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) - * Calls getsockopt. Not all opts are available. - * Supported levels: IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) - * Set callback flags for sock_ops - * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct - * @flags: flags value - * Return: 0 for no error - * -EINVAL if there is no full tcp socket - * bits in flags that are not supported by current kernel - * - * int bpf_skb_adjust_room(skb, len_diff, mode, flags) - * Grow or shrink room in sk_buff. - * @skb: pointer to skb - * @len_diff: (signed) amount of room to grow/shrink - * @mode: operation mode (enum bpf_adj_room_mode) - * @flags: reserved for future use - * Return: 0 on success or negative error code - * - * int bpf_sk_redirect_map(map, key, flags) - * Redirect skb to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_sock_map_update(skops, map, key, flags) - * @skops: pointer to bpf_sock_ops - * @map: pointer to sockmap to update - * @key: key to insert/update sock in map - * @flags: same flags as map update elem - * - * int bpf_xdp_adjust_meta(xdp_md, delta) - * Adjust the xdp_md.data_meta by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data_meta - * Return: 0 on success or negative on error - * - * int bpf_perf_event_read_value(map, flags, buf, buf_size) - * read perf event counter value and perf event enabled/running time - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @buf: buf to fill - * @buf_size: size of the buf - * Return: 0 on success or negative error code - * - * int bpf_perf_prog_read_value(ctx, buf, buf_size) - * read perf prog attached perf event counter and enabled/running time - * @ctx: pointer to ctx - * @buf: buf to fill - * @buf_size: size of the buf - * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set - * - * int bpf_msg_redirect_map(map, key, flags) - * Redirect msg to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_bind(ctx, addr, addr_len) - * Bind socket to address. Only binding to IP is supported, no port can be - * set in addr. - * @ctx: pointer to context of type bpf_sock_addr - * @addr: pointer to struct sockaddr to bind socket to - * @addr_len: length of sockaddr structure - * Return: 0 on success or negative error code + * Description + * Copy a NUL terminated string from an unsafe address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read()** helper here instead + * to read the string would require to estimate the length at + * compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * u64 bpf_get_socket_cookie(struct sk_buff *skb) + * Description + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a unique socket + * identifier per namespace. + * Return + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + * + * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Return + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + * + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * Description + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * Return + * 0 + * + * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There is a single supported mode at this time: + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * Return + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * + * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_override_return(struct pt_reg *regs, u64 rc) + * Description + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * Return + * 0 + * + * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * Description + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * The supported callback values that *argval* can combine are: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * Return + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + * + * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * Return + * 0 + * + * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * Return + * 0 + * + * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * Description + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * Description + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * Description + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * + * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * Description + * This helper is similar to **bpf_skb_load_bytes**\ () in that + * it provides an easy way to load *len* bytes from *offset* + * from the packet associated to *skb*, into the buffer pointed + * by *to*. The difference to **bpf_skb_load_bytes**\ () is that + * a fifth argument *start_header* exists in order to select a + * base offset to start from. *start_header* can be one of: + * + * **BPF_HDR_START_MAC** + * Base offset to load data from is *skb*'s mac header. + * **BPF_HDR_START_NET** + * Base offset to load data from is *skb*'s network header. + * + * In general, "direct packet access" is the preferred method to + * access packet data, however, this helper is in particular useful + * in socket filters where *skb*\ **->data** does not always point + * to the start of the mac header and where "direct packet access" + * is not available. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * Description + * Do FIB lookup in kernel tables using parameters in *params*. + * If lookup is successful and result shows packet is to be + * forwarded, the neighbor tables are searched for the nexthop. + * If successful (ie., FIB lookup shows forwarding and nexthop + * is resolved), the nexthop address is returned in ipv4_dst + * or ipv6_dst based on family, smac is set to mac address of + * egress device, dmac is set to nexthop mac address, rt_metric + * is set to metric from route (IPv4/IPv6 only). + * + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return + * Egress device index on success, 0 if packet needs to continue + * up the stack for further processing or a negative error in case + * of failure. + * + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a sockhash *map* referencing sockets. + * The *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. + * if the verdeict eBPF program returns **SK_PASS**), redirect it + * to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * Description + * Encapsulate the packet associated to *skb* within a Layer 3 + * protocol header. This header is provided in the buffer at + * address *hdr*, with *len* its size in bytes. *type* indicates + * the protocol of the header and can be one of: + * + * **BPF_LWT_ENCAP_SEG6** + * IPv6 encapsulation with Segment Routing Header + * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, + * the IPv6 header is computed by the kernel. + * **BPF_LWT_ENCAP_SEG6_INLINE** + * Only works if *skb* contains an IPv6 packet. Insert a + * Segment Routing Header (**struct ipv6_sr_hdr**) inside + * the IPv6 header. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. Only the flags, tag and TLVs + * inside the outermost IPv6 Segment Routing Header can be + * modified through this helper. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * Description + * Adjust the size allocated to TLVs in the outermost IPv6 + * Segment Routing Header contained in the packet associated to + * *skb*, at position *offset* by *delta* bytes. Only offsets + * after the segments are accepted. *delta* can be as well + * positive (growing) as negative (shrinking). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * Description + * Apply an IPv6 Segment Routing action of type *action* to the + * packet associated to *skb*. Each action takes a parameter + * contained at address *param*, and of length *param_len* bytes. + * *action* can be one of: + * + * **SEG6_LOCAL_ACTION_END_X** + * End.X action: Endpoint with Layer-3 cross-connect. + * Type of *param*: **struct in6_addr**. + * **SEG6_LOCAL_ACTION_END_T** + * End.T action: Endpoint with specific IPv6 table lookup. + * Type of *param*: **int**. + * **SEG6_LOCAL_ACTION_END_B6** + * End.B6 action: Endpoint bound to an SRv6 policy. + * Type of param: **struct ipv6_sr_hdr**. + * **SEG6_LOCAL_ACTION_END_B6_ENCAP** + * End.B6.Encap action: Endpoint bound to an SRv6 + * encapsulation policy. + * Type of param: **struct ipv6_sr_hdr**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown** () again with the same values, or calling + * **bpf_rc_repeat** (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Return + * 0 + * + * int bpf_rc_repeat(void *ctx) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. + * + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Return + * 0 + * + * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * Description + * Return the cgroup v2 id of the socket associated with the *skb*. + * This is roughly similar to the **bpf_get_cgroup_classid**\ () + * helper for cgroup v1 by providing a tag resp. identifier that + * can be matched on or used for map lookups e.g. to implement + * policy. The cgroup v2 id of a given path in the hierarchy is + * exposed in user space through the f_handle API in order to get + * to the same 64-bit id. + * + * This helper can be used on TC egress path, but not on ingress, + * and is available only if the kernel was compiled with the + * **CONFIG_SOCK_CGROUP_DATA** configuration option. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_get_current_cgroup_id(void) + * Return + * A 64-bit integer containing the current cgroup id based + * on the cgroup within which the current task is running. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -821,7 +2141,23 @@ union bpf_attr { FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ FN(msg_pull_data), \ - FN(bind), + FN(bind), \ + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), \ + FN(get_stack), \ + FN(skb_load_bytes_relative), \ + FN(fib_lookup), \ + FN(sock_hash_update), \ + FN(msg_redirect_hash), \ + FN(sk_redirect_hash), \ + FN(lwt_push_encap), \ + FN(lwt_seg6_store_bytes), \ + FN(lwt_seg6_adjust_srh), \ + FN(lwt_seg6_action), \ + FN(rc_repeat), \ + FN(rc_keydown), \ + FN(skb_cgroup_id), \ + FN(get_current_cgroup_id), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -855,11 +2191,14 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ #define BPF_F_TUNINFO_IPV6 (1ULL << 0) -/* BPF_FUNC_get_stackid flags. */ +/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ #define BPF_F_SKIP_FIELD_MASK 0xffULL #define BPF_F_USER_STACK (1ULL << 8) +/* flags used by BPF_FUNC_get_stackid only. */ #define BPF_F_FAST_STACK_CMP (1ULL << 9) #define BPF_F_REUSE_STACKID (1ULL << 10) +/* flags used by BPF_FUNC_get_stack only. */ +#define BPF_F_USER_BUILD_ID (1ULL << 11) /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) @@ -879,6 +2218,18 @@ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, }; +/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ +enum bpf_hdr_start_off { + BPF_HDR_START_MAC, + BPF_HDR_START_NET, +}; + +/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ +enum bpf_lwt_encap_mode { + BPF_LWT_ENCAP_SEG6, + BPF_LWT_ENCAP_SEG6_INLINE +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -923,10 +2274,24 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; + __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + __u16 ext; /* Padding, future use. */ + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT @@ -999,6 +2364,14 @@ enum sk_action { struct sk_msg_md { void *data; void *data_end; + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; #define BPF_TAG_SIZE 8 @@ -1017,8 +2390,13 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; + __u32 nr_jited_ksyms; + __u32 nr_jited_func_lens; + __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; } __attribute__((aligned(8))); struct bpf_map_info { @@ -1030,8 +2408,18 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; + __u32 btf_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; +} __attribute__((aligned(8))); + +struct bpf_btf_info { + __aligned_u64 btf; + __u32 btf_size; + __u32 id; } __attribute__((aligned(8))); /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed @@ -1052,6 +2440,12 @@ struct bpf_sock_addr { __u32 family; /* Allows 4-byte read, but no write */ __u32 type; /* Allows 4-byte read, but no write */ __u32 protocol; /* Allows 4-byte read, but no write */ + __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ + __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ }; /* User bpf_sock_ops struct to access socket values and specify request ops @@ -1212,4 +2606,64 @@ struct bpf_raw_tracepoint_args { __u64 args[0]; }; +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + */ +#define BPF_FIB_LOOKUP_DIRECT BIT(0) +#define BPF_FIB_LOOKUP_OUTPUT BIT(1) + +struct bpf_fib_lookup { + /* input: network family for lookup (AF_INET, AF_INET6) + * output: network family of egress nexthop + */ + __u8 family; + + /* set if lookup is to consider L4 data - e.g., FIB rules */ + __u8 l4_protocol; + __be16 sport; + __be16 dport; + + /* total length of packet from network header - used for MTU check */ + __u16 tot_len; + __u32 ifindex; /* L3 device index for lookup */ + + union { + /* inputs to lookup */ + __u8 tos; /* AF_INET */ + __be32 flowinfo; /* AF_INET6, flow_label + priority */ + + /* output: metric of fib result (IPv4/IPv6 only) */ + __u32 rt_metric; + }; + + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ + }; + + /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in + * network header. output: bpf_fib_lookup sets to gateway address + * if FIB lookup returns gateway route + */ + union { + __be32 ipv4_dst; + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ +}; + +enum bpf_task_fd_type { + BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_UPROBE, /* filename + offset */ + BPF_FD_TYPE_URETPROBE, /* filename + offset */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h new file mode 100644 index 000000000000..0b5ddbe135a4 --- /dev/null +++ b/tools/include/uapi/linux/btf.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2018 Facebook */ +#ifndef _UAPI__LINUX_BTF_H__ +#define _UAPI__LINUX_BTF_H__ + +#include <linux/types.h> + +#define BTF_MAGIC 0xeB9F +#define BTF_VERSION 1 + +struct btf_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 type_off; /* offset of type section */ + __u32 type_len; /* length of type section */ + __u32 str_off; /* offset of string section */ + __u32 str_len; /* length of string section */ +}; + +/* Max # of type identifier */ +#define BTF_MAX_TYPE 0x0000ffff +/* Max offset into the string section */ +#define BTF_MAX_NAME_OFFSET 0x0000ffff +/* Max # of struct/union/enum members or func args */ +#define BTF_MAX_VLEN 0xffff + +struct btf_type { + __u32 name_off; + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members) + * bits 16-23: unused + * bits 24-27: kind (e.g. int, ptr, array...etc) + * bits 28-31: unused + */ + __u32 info; + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is a type_id referring to another type. + */ + union { + __u32 size; + __u32 type; + }; +}; + +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) +#define BTF_INFO_VLEN(info) ((info) & 0xffff) + +#define BTF_KIND_UNKN 0 /* Unknown */ +#define BTF_KIND_INT 1 /* Integer */ +#define BTF_KIND_PTR 2 /* Pointer */ +#define BTF_KIND_ARRAY 3 /* Array */ +#define BTF_KIND_STRUCT 4 /* Struct */ +#define BTF_KIND_UNION 5 /* Union */ +#define BTF_KIND_ENUM 6 /* Enumeration */ +#define BTF_KIND_FWD 7 /* Forward */ +#define BTF_KIND_TYPEDEF 8 /* Typedef */ +#define BTF_KIND_VOLATILE 9 /* Volatile */ +#define BTF_KIND_CONST 10 /* Const */ +#define BTF_KIND_RESTRICT 11 /* Restrict */ +#define BTF_KIND_MAX 11 +#define NR_BTF_KINDS 12 + +/* For some specific BTF_KIND, "struct btf_type" is immediately + * followed by extra data. + */ + +/* BTF_KIND_INT is followed by a u32 and the following + * is the 32 bits arrangement: + */ +#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) +#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) + +/* Attributes stored in the BTF_INT_ENCODING */ +#define BTF_INT_SIGNED (1 << 0) +#define BTF_INT_CHAR (1 << 1) +#define BTF_INT_BOOL (1 << 2) + +/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". + * The exact number of btf_enum is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum { + __u32 name_off; + __s32 val; +}; + +/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ +struct btf_array { + __u32 type; + __u32 index_type; + __u32 nelems; +}; + +/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed + * by multiple "struct btf_member". The exact number + * of btf_member is stored in the vlen (of the info in + * "struct btf_type"). + */ +struct btf_member { + __u32 name_off; + __u32 type; + __u32 offset; /* offset in bits */ +}; + +#endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h new file mode 100644 index 000000000000..841573019ae1 --- /dev/null +++ b/tools/include/uapi/linux/erspan.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ERSPAN Tunnel Metadata + * + * Copyright (c) 2018 VMware + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Userspace API for metadata mode ERSPAN tunnel + */ +#ifndef _UAPI_ERSPAN_H +#define _UAPI_ERSPAN_H + +#include <linux/types.h> /* For __beXX in userspace */ +#include <asm/byteorder.h> + +/* ERSPAN version 2 metadata header */ +struct erspan_md2 { + __be32 timestamp; + __be16 sgt; /* security group tag */ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hwid_upper:2, + ft:5, + p:1; + __u8 o:1, + gra:2, + dir:1, + hwid:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 p:1, + ft:5, + hwid_upper:2; + __u8 hwid:4, + dir:1, + gra:2, + o:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif +}; + +struct erspan_metadata { + int version; + union { + __be32 index; /* Version 1 (type II)*/ + struct erspan_md2 md2; /* Version 2 (type III) */ + } u; +}; + +#endif /* _UAPI_ERSPAN_H */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 68699f654118..cf01b6824244 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -333,6 +333,7 @@ enum { IFLA_BRPORT_BCAST_FLOOD, IFLA_BRPORT_GROUP_FWD_MASK, IFLA_BRPORT_NEIGH_SUPPRESS, + IFLA_BRPORT_ISOLATED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -516,6 +517,7 @@ enum { IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, + IFLA_VXLAN_TTL_INHERIT, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b02c41e53d56..b6270a3b38e9 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -677,10 +677,10 @@ struct kvm_ioeventfd { }; #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) -#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_HLT | \ KVM_X86_DISABLE_EXITS_PAUSE) /* for KVM_ENABLE_CAP */ @@ -948,6 +948,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 +#define KVM_CAP_HYPERV_TLBFLUSH 155 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h new file mode 100644 index 000000000000..f189931042a7 --- /dev/null +++ b/tools/include/uapi/linux/lirc.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * lirc.h - linux infrared remote control header file + * last modified 2010/07/13 by Jarod Wilson + */ + +#ifndef _LINUX_LIRC_H +#define _LINUX_LIRC_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define PULSE_BIT 0x01000000 +#define PULSE_MASK 0x00FFFFFF + +#define LIRC_MODE2_SPACE 0x00000000 +#define LIRC_MODE2_PULSE 0x01000000 +#define LIRC_MODE2_FREQUENCY 0x02000000 +#define LIRC_MODE2_TIMEOUT 0x03000000 + +#define LIRC_VALUE_MASK 0x00FFFFFF +#define LIRC_MODE2_MASK 0xFF000000 + +#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) +#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) +#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) +#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) + +#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) +#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) + +#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) +#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) +#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) +#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) + +/* used heavily by lirc userspace */ +#define lirc_t int + +/*** lirc compatible hardware features ***/ + +#define LIRC_MODE2SEND(x) (x) +#define LIRC_SEND2MODE(x) (x) +#define LIRC_MODE2REC(x) ((x) << 16) +#define LIRC_REC2MODE(x) ((x) >> 16) + +#define LIRC_MODE_RAW 0x00000001 +#define LIRC_MODE_PULSE 0x00000002 +#define LIRC_MODE_MODE2 0x00000004 +#define LIRC_MODE_SCANCODE 0x00000008 +#define LIRC_MODE_LIRCCODE 0x00000010 + + +#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) +#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) +#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) +#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) + +#define LIRC_CAN_SEND_MASK 0x0000003f + +#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 +#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 +#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 + +#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) +#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) +#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) +#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) +#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) + +#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) + +#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) +#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) + +#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 +#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 +#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 +#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 +#define LIRC_CAN_SET_REC_FILTER 0x08000000 + +#define LIRC_CAN_MEASURE_CARRIER 0x02000000 +#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 + +#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) +#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) + +#define LIRC_CAN_NOTIFY_DECODE 0x01000000 + +/*** IOCTL commands for lirc driver ***/ + +#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) + +#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) +#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) +#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) + +#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) +#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) + +/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ +#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) + +#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) +#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) +/* Note: these can reset the according pulse_width */ +#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) +#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) +#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) +#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) + +/* + * when a timeout != 0 is set the driver will send a + * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is + * never sent, timeout is disabled by default + */ +#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) + +/* 1 enables, 0 disables timeout reports in MODE2 */ +#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) + +/* + * if enabled from the next key press on the driver will send + * LIRC_MODE2_FREQUENCY packets + */ +#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) + +/* + * to set a range use LIRC_SET_REC_CARRIER_RANGE with the + * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound + */ +#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) + +#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) + +/* + * struct lirc_scancode - decoded scancode with protocol for use with + * LIRC_MODE_SCANCODE + * + * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR + * was decoded. + * @flags: should be 0 for transmit. When receiving scancodes, + * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set + * depending on the protocol + * @rc_proto: see enum rc_proto + * @keycode: the translated keycode. Set to 0 for transmit. + * @scancode: the scancode received or to be sent + */ +struct lirc_scancode { + __u64 timestamp; + __u16 flags; + __u16 rc_proto; + __u32 keycode; + __u64 scancode; +}; + +/* Set if the toggle bit of rc-5 or rc-6 is enabled */ +#define LIRC_SCANCODE_FLAG_TOGGLE 1 +/* Set if this is a nec or sanyo repeat */ +#define LIRC_SCANCODE_FLAG_REPEAT 2 + +/** + * enum rc_proto - the Remote Controller protocol + * + * @RC_PROTO_UNKNOWN: Protocol not known + * @RC_PROTO_OTHER: Protocol known but proprietary + * @RC_PROTO_RC5: Philips RC5 protocol + * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol + * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 + * @RC_PROTO_JVC: JVC protocol + * @RC_PROTO_SONY12: Sony 12 bit protocol + * @RC_PROTO_SONY15: Sony 15 bit protocol + * @RC_PROTO_SONY20: Sony 20 bit protocol + * @RC_PROTO_NEC: NEC protocol + * @RC_PROTO_NECX: Extended NEC protocol + * @RC_PROTO_NEC32: NEC 32 bit protocol + * @RC_PROTO_SANYO: Sanyo protocol + * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard + * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse + * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol + * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol + * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol + * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol + * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol + * @RC_PROTO_SHARP: Sharp protocol + * @RC_PROTO_XMP: XMP protocol + * @RC_PROTO_CEC: CEC protocol + * @RC_PROTO_IMON: iMon Pad protocol + */ +enum rc_proto { + RC_PROTO_UNKNOWN = 0, + RC_PROTO_OTHER = 1, + RC_PROTO_RC5 = 2, + RC_PROTO_RC5X_20 = 3, + RC_PROTO_RC5_SZ = 4, + RC_PROTO_JVC = 5, + RC_PROTO_SONY12 = 6, + RC_PROTO_SONY15 = 7, + RC_PROTO_SONY20 = 8, + RC_PROTO_NEC = 9, + RC_PROTO_NECX = 10, + RC_PROTO_NEC32 = 11, + RC_PROTO_SANYO = 12, + RC_PROTO_MCIR2_KBD = 13, + RC_PROTO_MCIR2_MSE = 14, + RC_PROTO_RC6_0 = 15, + RC_PROTO_RC6_6A_20 = 16, + RC_PROTO_RC6_6A_24 = 17, + RC_PROTO_RC6_6A_32 = 18, + RC_PROTO_RC6_MCE = 19, + RC_PROTO_SHARP = 20, + RC_PROTO_XMP = 21, + RC_PROTO_CEC = 22, + RC_PROTO_IMON = 23, +}; + +#endif diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index af5f8c2df87a..c0d7ea0bf5b6 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -170,7 +170,7 @@ struct prctl_mm_map { * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. * - * See Documentation/prctl/no_new_privs.txt for more details. + * See Documentation/userspace-api/no_new_privs.rst for more details. */ #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -207,4 +207,16 @@ struct prctl_mm_map { # define PR_SVE_VL_LEN_MASK 0xffff # define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..286e8d6a8e98 --- /dev/null +++ b/tools/include/uapi/linux/seg6.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +#include <linux/types.h> +#include <linux/in6.h> /* For struct in6_addr. */ + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; /* Represents the last_entry field of SRH */ + __u8 flags; + __u16 tag; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/tools/include/uapi/linux/seg6_local.h b/tools/include/uapi/linux/seg6_local.h new file mode 100644 index 000000000000..edc138bdc56d --- /dev/null +++ b/tools/include/uapi/linux/seg6_local.h @@ -0,0 +1,80 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_LOCAL_H +#define _UAPI_LINUX_SEG6_LOCAL_H + +#include <linux/seg6.h> + +enum { + SEG6_LOCAL_UNSPEC, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_SRH, + SEG6_LOCAL_TABLE, + SEG6_LOCAL_NH4, + SEG6_LOCAL_NH6, + SEG6_LOCAL_IIF, + SEG6_LOCAL_OIF, + SEG6_LOCAL_BPF, + __SEG6_LOCAL_MAX, +}; +#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) + +enum { + SEG6_LOCAL_ACTION_UNSPEC = 0, + /* node segment */ + SEG6_LOCAL_ACTION_END = 1, + /* adjacency segment (IPv6 cross-connect) */ + SEG6_LOCAL_ACTION_END_X = 2, + /* lookup of next seg NH in table */ + SEG6_LOCAL_ACTION_END_T = 3, + /* decap and L2 cross-connect */ + SEG6_LOCAL_ACTION_END_DX2 = 4, + /* decap and IPv6 cross-connect */ + SEG6_LOCAL_ACTION_END_DX6 = 5, + /* decap and IPv4 cross-connect */ + SEG6_LOCAL_ACTION_END_DX4 = 6, + /* decap and lookup of DA in v6 table */ + SEG6_LOCAL_ACTION_END_DT6 = 7, + /* decap and lookup of DA in v4 table */ + SEG6_LOCAL_ACTION_END_DT4 = 8, + /* binding segment with insertion */ + SEG6_LOCAL_ACTION_END_B6 = 9, + /* binding segment with encapsulation */ + SEG6_LOCAL_ACTION_END_B6_ENCAP = 10, + /* binding segment with MPLS encap */ + SEG6_LOCAL_ACTION_END_BM = 11, + /* lookup last seg in table */ + SEG6_LOCAL_ACTION_END_S = 12, + /* forward to SR-unaware VNF with static proxy */ + SEG6_LOCAL_ACTION_END_AS = 13, + /* forward to SR-unaware VNF with masquerading */ + SEG6_LOCAL_ACTION_END_AM = 14, + /* custom BPF action */ + SEG6_LOCAL_ACTION_END_BPF = 15, + + __SEG6_LOCAL_ACTION_MAX, +}; + +#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1) + +enum { + SEG6_LOCAL_BPF_PROG_UNSPEC, + SEG6_LOCAL_BPF_PROG, + SEG6_LOCAL_BPF_PROG_NAME, + __SEG6_LOCAL_BPF_PROG_MAX, +}; + +#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1) + +#endif diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 6a12bbf39f7b..7aba8243a0e7 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -201,7 +201,7 @@ static void mem_toupper(char *f, size_t len) /* * Check for "NAME_PATH" environment variable to override fs location (for - * testing). This matches the recommendation in Documentation/sysfs-rules.txt + * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst * for SYSFS_PATH. */ static bool fs__env_override(struct fs *fs) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 7b7fd0b18551..120037496f77 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -13,11 +13,9 @@ #include "tracing_path.h" - -char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; -char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; -char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; - +static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; +static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; +static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; static void __tracing_path_set(const char *tracing, const char *mountpoint) { @@ -76,7 +74,7 @@ char *get_tracing_file(const char *name) { char *file; - if (asprintf(&file, "%s/%s", tracing_path, name) < 0) + if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0) return NULL; return file; @@ -87,6 +85,34 @@ void put_tracing_file(char *file) free(file); } +char *get_events_file(const char *name) +{ + char *file; + + if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0) + return NULL; + + return file; +} + +void put_events_file(char *file) +{ + free(file); +} + +DIR *tracing_events__opendir(void) +{ + DIR *dir = NULL; + char *path = get_tracing_file("events"); + + if (path) { + dir = opendir(path); + put_events_file(path); + } + + return dir; +} + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) { @@ -129,7 +155,7 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size, snprintf(buf, size, "Error:\tNo permissions to read %s/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - tracing_events_path, filename, tracing_mnt); + tracing_events_path, filename, tracing_path_mount()); } break; default: diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h index 0066f06cc381..a19136b086dc 100644 --- a/tools/lib/api/fs/tracing_path.h +++ b/tools/lib/api/fs/tracing_path.h @@ -3,9 +3,9 @@ #define __API_FS_TRACING_PATH_H #include <linux/types.h> +#include <dirent.h> -extern char tracing_path[]; -extern char tracing_events_path[]; +DIR *tracing_events__opendir(void); void tracing_path_set(const char *mountpoint); const char *tracing_path_mount(void); @@ -13,5 +13,10 @@ const char *tracing_path_mount(void); char *get_tracing_file(const char *name); void put_tracing_file(char *file); +char *get_events_file(const char *name); +void put_events_file(char *file); + +#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; }) + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); #endif /* __API_FS_TRACING_PATH_H */ diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 64c679d67109..6070e655042d 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e6d5f8d1477f..5390e7725e43 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -69,7 +69,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf FEATURE_DISPLAY = libelf bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 @@ -189,6 +189,7 @@ install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); + $(call do_install,btf.h,$(prefix)/include/bpf,644); install: install_lib diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index acbb3f8b3bec..9ddc89dae962 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -73,43 +73,77 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - __u32 name_len = name ? strlen(name) : 0; + __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + attr.map_type = create_attr->map_type; + attr.key_size = create_attr->key_size; + attr.value_size = create_attr->value_size; + attr.max_entries = create_attr->max_entries; + attr.map_flags = create_attr->map_flags; + memcpy(attr.map_name, create_attr->name, + min(name_len, BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = create_attr->numa_node; + attr.btf_fd = create_attr->btf_fd; + attr.btf_key_type_id = create_attr->btf_key_type_id; + attr.btf_value_type_id = create_attr->btf_value_type_id; + attr.map_ifindex = create_attr->map_ifindex; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; if (node >= 0) { - attr.map_flags |= BPF_F_NUMA_NODE; - attr.numa_node = node; + map_attr.numa_node = node; + map_attr.map_flags |= BPF_F_NUMA_NODE; } - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return bpf_create_map_xattr(&map_attr); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, NULL, key_size, value_size, - max_entries, map_flags, -1); + struct bpf_create_map_attr map_attr = {}; + + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, name, key_size, value_size, - max_entries, map_flags, -1); + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, @@ -168,6 +202,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = 0; attr.log_level = 0; attr.kern_version = load_attr->kern_version; + attr.prog_ifindex = load_attr->prog_ifindex; memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); @@ -425,6 +460,16 @@ int bpf_map_get_fd_by_id(__u32 id) return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); } +int bpf_btf_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.btf_id = id; + + return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); +} + int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) { union bpf_attr attr; @@ -573,3 +618,51 @@ cleanup: close(sock); return ret; } + +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log) +{ + union bpf_attr attr = {}; + int fd; + + attr.btf = ptr_to_u64(btf); + attr.btf_size = btf_size; + +retry: + if (do_log && log_buf && log_buf_size) { + attr.btf_log_level = 1; + attr.btf_log_size = log_buf_size; + attr.btf_log_buf = ptr_to_u64(log_buf); + } + + fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + if (fd == -1 && !do_log && log_buf && log_buf_size) { + do_log = true; + goto retry; + } + + return fd; +} + +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr) +{ + union bpf_attr attr = {}; + int err; + + attr.task_fd_query.pid = pid; + attr.task_fd_query.fd = fd; + attr.task_fd_query.flags = flags; + attr.task_fd_query.buf = ptr_to_u64(buf); + attr.task_fd_query.buf_len = *buf_len; + + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + *buf_len = attr.task_fd_query.buf_len; + *prog_id = attr.task_fd_query.prog_id; + *fd_type = attr.task_fd_query.fd_type; + *probe_offset = attr.task_fd_query.probe_offset; + *probe_addr = attr.task_fd_query.probe_addr; + + return err; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 39f6a0d64a3b..0639a30a457d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,8 +24,24 @@ #define __BPF_BPF_H #include <linux/bpf.h> +#include <stdbool.h> #include <stddef.h> +struct bpf_create_map_attr { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; +}; + +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node); @@ -49,6 +65,7 @@ struct bpf_load_program_attr { size_t insns_cnt; const char *license; __u32 kern_version; + __u32 prog_ifindex; }; /* Recommend log buffer size */ @@ -83,8 +100,14 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); int bpf_prog_get_fd_by_id(__u32 id); int bpf_map_get_fd_by_id(__u32 id); +int bpf_btf_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); int bpf_raw_tracepoint_open(const char *name, int prog_fd); +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log); +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr); #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c new file mode 100644 index 000000000000..8c54a4b6f187 --- /dev/null +++ b/tools/lib/bpf/btf.c @@ -0,0 +1,373 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include "btf.h" +#include "bpf.h" + +#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +#define BTF_MAX_NR_TYPES 65535 + +static struct btf_type btf_void; + +struct btf { + union { + struct btf_header *hdr; + void *data; + }; + struct btf_type **types; + const char *strings; + void *nohdr_data; + uint32_t nr_types; + uint32_t types_size; + uint32_t data_size; + int fd; +}; + +static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) +{ + if (offset < btf->hdr->str_len) + return &btf->strings[offset]; + else + return NULL; +} + +static int btf_add_type(struct btf *btf, struct btf_type *t) +{ + if (btf->types_size - btf->nr_types < 2) { + struct btf_type **new_types; + u32 expand_by, new_size; + + if (btf->types_size == BTF_MAX_NR_TYPES) + return -E2BIG; + + expand_by = max(btf->types_size >> 2, 16); + new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); + + new_types = realloc(btf->types, sizeof(*new_types) * new_size); + if (!new_types) + return -ENOMEM; + + if (btf->nr_types == 0) + new_types[0] = &btf_void; + + btf->types = new_types; + btf->types_size = new_size; + } + + btf->types[++(btf->nr_types)] = t; + + return 0; +} + +static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + u32 meta_left; + + if (btf->data_size < sizeof(struct btf_header)) { + elog("BTF header not found\n"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = btf->data_size - sizeof(*hdr); + if (!meta_left) { + elog("BTF has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->type_off) { + elog("Invalid BTF type section offset:%u\n", hdr->type_off); + return -EINVAL; + } + + if (meta_left < hdr->str_off) { + elog("Invalid BTF string section offset:%u\n", hdr->str_off); + return -EINVAL; + } + + if (hdr->type_off >= hdr->str_off) { + elog("BTF type section offset >= string section offset. No type?\n"); + return -EINVAL; + } + + if (hdr->type_off & 0x02) { + elog("BTF type section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + btf->nohdr_data = btf->hdr + 1; + + return 0; +} + +static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + const char *start = btf->nohdr_data + hdr->str_off; + const char *end = start + btf->hdr->str_len; + + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + start[0] || end[-1]) { + elog("Invalid BTF string section\n"); + return -EINVAL; + } + + btf->strings = start; + + return 0; +} + +static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +{ + struct btf_header *hdr = btf->hdr; + void *nohdr_data = btf->nohdr_data; + void *next_type = nohdr_data + hdr->type_off; + void *end_type = nohdr_data + hdr->str_off; + + while (next_type < end_type) { + struct btf_type *t = next_type; + uint16_t vlen = BTF_INFO_VLEN(t->info); + int err; + + next_type += sizeof(*t); + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + next_type += sizeof(int); + break; + case BTF_KIND_ARRAY: + next_type += sizeof(struct btf_array); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + next_type += vlen * sizeof(struct btf_member); + break; + case BTF_KIND_ENUM: + next_type += vlen * sizeof(struct btf_enum); + break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + break; + default: + elog("Unsupported BTF_KIND:%u\n", + BTF_INFO_KIND(t->info)); + return -EINVAL; + } + + err = btf_add_type(btf, t); + if (err) + return err; + } + + return 0; +} + +static const struct btf_type *btf_type_by_id(const struct btf *btf, + uint32_t type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} + +static bool btf_type_is_void(const struct btf_type *t) +{ + return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD; +} + +static bool btf_type_is_void_or_null(const struct btf_type *t) +{ + return !t || btf_type_is_void(t); +} + +static int64_t btf_type_size(const struct btf_type *t) +{ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + return t->size; + case BTF_KIND_PTR: + return sizeof(void *); + default: + return -EINVAL; + } +} + +#define MAX_RESOLVE_DEPTH 32 + +int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) +{ + const struct btf_array *array; + const struct btf_type *t; + uint32_t nelems = 1; + int64_t size = -1; + int i; + + t = btf_type_by_id(btf, type_id); + for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); + i++) { + size = btf_type_size(t); + if (size >= 0) + break; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + type_id = t->type; + break; + case BTF_KIND_ARRAY: + array = (const struct btf_array *)(t + 1); + if (nelems && array->nelems > UINT32_MAX / nelems) + return -E2BIG; + nelems *= array->nelems; + type_id = array->type; + break; + default: + return -EINVAL; + } + + t = btf_type_by_id(btf, type_id); + } + + if (size < 0) + return -EINVAL; + + if (nelems && size > UINT32_MAX / nelems) + return -E2BIG; + + return nelems * size; +} + +int32_t btf__find_by_name(const struct btf *btf, const char *type_name) +{ + uint32_t i; + + if (!strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name = btf_name_by_offset(btf, t->name_off); + + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + +void btf__free(struct btf *btf) +{ + if (!btf) + return; + + if (btf->fd != -1) + close(btf->fd); + + free(btf->data); + free(btf->types); + free(btf); +} + +struct btf *btf__new(uint8_t *data, uint32_t size, + btf_print_fn_t err_log) +{ + uint32_t log_buf_size = 0; + char *log_buf = NULL; + struct btf *btf; + int err; + + btf = calloc(1, sizeof(struct btf)); + if (!btf) + return ERR_PTR(-ENOMEM); + + btf->fd = -1; + + if (err_log) { + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) { + err = -ENOMEM; + goto done; + } + *log_buf = 0; + log_buf_size = BPF_LOG_BUF_SIZE; + } + + btf->data = malloc(size); + if (!btf->data) { + err = -ENOMEM; + goto done; + } + + memcpy(btf->data, data, size); + btf->data_size = size; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + + if (btf->fd == -1) { + err = -errno; + elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (log_buf && *log_buf) + elog("%s\n", log_buf); + goto done; + } + + err = btf_parse_hdr(btf, err_log); + if (err) + goto done; + + err = btf_parse_str_sec(btf, err_log); + if (err) + goto done; + + err = btf_parse_type_sec(btf, err_log); + +done: + free(log_buf); + + if (err) { + btf__free(btf); + return ERR_PTR(err); + } + + return btf; +} + +int btf__fd(const struct btf *btf) +{ + return btf->fd; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h new file mode 100644 index 000000000000..74bb344035bb --- /dev/null +++ b/tools/lib/bpf/btf.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#ifndef __BPF_BTF_H +#define __BPF_BTF_H + +#include <stdint.h> + +#define BTF_ELF_SEC ".BTF" + +struct btf; + +typedef int (*btf_print_fn_t)(const char *, ...) + __attribute__((format(printf, 1, 2))); + +void btf__free(struct btf *btf); +struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log); +int32_t btf__find_by_name(const struct btf *btf, const char *type_name); +int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id); +int btf__fd(const struct btf *btf); + +#endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0f9f06df49bc..a1e96b5de5ff 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,6 +31,7 @@ #include <unistd.h> #include <fcntl.h> #include <errno.h> +#include <perf-sys.h> #include <asm/unistd.h> #include <linux/err.h> #include <linux/kernel.h> @@ -45,6 +46,7 @@ #include "libbpf.h" #include "bpf.h" +#include "btf.h" #ifndef EM_BPF #define EM_BPF 247 @@ -176,6 +178,7 @@ struct bpf_program { /* Index in elf obj file, for relocation use. */ int idx; char *name; + int prog_ifindex; char *section_name; struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; @@ -211,7 +214,10 @@ struct bpf_map { int fd; char *name; size_t offset; + int map_ifindex; struct bpf_map_def def; + uint32_t btf_key_type_id; + uint32_t btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; }; @@ -256,6 +262,8 @@ struct bpf_object { */ struct list_head list; + struct btf *btf; + void *priv; bpf_object_clear_priv_t clear_priv; @@ -819,7 +827,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size); else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; - else if (sh.sh_type == SHT_SYMTAB) { + else if (strcmp(name, BTF_ELF_SEC) == 0) { + obj->btf = btf__new(data->d_buf, data->d_size, + __pr_debug); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } + } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", obj->path); @@ -996,33 +1012,127 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return 0; } +static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) +{ + struct bpf_map_def *def = &map->def; + const size_t max_name = 256; + int64_t key_size, value_size; + int32_t key_id, value_id; + char name[max_name]; + + /* Find key type by name from BTF */ + if (snprintf(name, max_name, "%s_key", map->name) == max_name) { + pr_warning("map:%s length of BTF key_type:%s_key is too long\n", + map->name, map->name); + return -EINVAL; + } + + key_id = btf__find_by_name(btf, name); + if (key_id < 0) { + pr_debug("map:%s key_type:%s cannot be found in BTF\n", + map->name, name); + return key_id; + } + + key_size = btf__resolve_size(btf, key_id); + if (key_size < 0) { + pr_warning("map:%s key_type:%s cannot get the BTF type_size\n", + map->name, name); + return key_size; + } + + if (def->key_size != key_size) { + pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n", + map->name, name, (unsigned int)key_size, def->key_size); + return -EINVAL; + } + + /* Find value type from BTF */ + if (snprintf(name, max_name, "%s_value", map->name) == max_name) { + pr_warning("map:%s length of BTF value_type:%s_value is too long\n", + map->name, map->name); + return -EINVAL; + } + + value_id = btf__find_by_name(btf, name); + if (value_id < 0) { + pr_debug("map:%s value_type:%s cannot be found in BTF\n", + map->name, name); + return value_id; + } + + value_size = btf__resolve_size(btf, value_id); + if (value_size < 0) { + pr_warning("map:%s value_type:%s cannot get the BTF type_size\n", + map->name, name); + return value_size; + } + + if (def->value_size != value_size) { + pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n", + map->name, name, (unsigned int)value_size, def->value_size); + return -EINVAL; + } + + map->btf_key_type_id = key_id; + map->btf_value_type_id = value_id; + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { + struct bpf_create_map_attr create_attr = {}; unsigned int i; + int err; for (i = 0; i < obj->nr_maps; i++) { - struct bpf_map_def *def = &obj->maps[i].def; - int *pfd = &obj->maps[i].fd; - - *pfd = bpf_create_map_name(def->type, - obj->maps[i].name, - def->key_size, - def->value_size, - def->max_entries, - def->map_flags); + struct bpf_map *map = &obj->maps[i]; + struct bpf_map_def *def = &map->def; + int *pfd = &map->fd; + + create_attr.name = map->name; + create_attr.map_ifindex = map->map_ifindex; + create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; + create_attr.max_entries = def->max_entries; + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + + if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_type_id = map->btf_key_type_id; + create_attr.btf_value_type_id = map->btf_value_type_id; + } + + *pfd = bpf_create_map_xattr(&create_attr); + if (*pfd < 0 && create_attr.btf_key_type_id) { + pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, strerror(errno), errno); + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + *pfd = bpf_create_map_xattr(&create_attr); + } + if (*pfd < 0) { size_t j; - int err = *pfd; + err = *pfd; pr_warning("failed to create map (name: '%s'): %s\n", - obj->maps[i].name, + map->name, strerror(errno)); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; } - pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd); + pr_debug("create map %s: fd=%d\n", map->name, *pfd); } return 0; @@ -1166,7 +1276,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) static int load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, u32 kern_version, int *pfd) + char *license, u32 kern_version, int *pfd, int prog_ifindex) { struct bpf_load_program_attr load_attr; char *log_buf; @@ -1180,6 +1290,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog_ifindex; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1261,7 +1372,8 @@ bpf_program__load(struct bpf_program *prog, } err = load_program(prog->type, prog->expected_attach_type, prog->name, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + license, kern_version, &fd, + prog->prog_ifindex); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1292,7 +1404,8 @@ bpf_program__load(struct bpf_program *prog, err = load_program(prog->type, prog->expected_attach_type, prog->name, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd); + license, kern_version, &fd, + prog->prog_ifindex); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -1331,9 +1444,39 @@ bpf_object__load_progs(struct bpf_object *obj) return 0; } -static int bpf_object__validate(struct bpf_object *obj) +static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + case BPF_PROG_TYPE_LIRC_MODE2: + return false; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + default: + return true; + } +} + +static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) { - if (obj->kern_version == 0) { + if (needs_kver && obj->kern_version == 0) { pr_warning("%s doesn't provide kernel version\n", obj->path); return -LIBBPF_ERRNO__KVERSION; @@ -1342,7 +1485,8 @@ static int bpf_object__validate(struct bpf_object *obj) } static struct bpf_object * -__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz) +__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, + bool needs_kver) { struct bpf_object *obj; int err; @@ -1360,7 +1504,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz) CHECK_ERR(bpf_object__check_endianness(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); - CHECK_ERR(bpf_object__validate(obj), err, out); + CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); bpf_object__elf_finish(obj); return obj; @@ -1377,7 +1521,7 @@ struct bpf_object *bpf_object__open(const char *path) pr_debug("loading %s\n", path); - return __bpf_object__open(path, NULL, 0); + return __bpf_object__open(path, NULL, 0, true); } struct bpf_object *bpf_object__open_buffer(void *obj_buf, @@ -1400,7 +1544,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf, pr_debug("loading object '%s' from buffer\n", name); - return __bpf_object__open(name, obj_buf, obj_buf_sz); + return __bpf_object__open(name, obj_buf, obj_buf_sz, true); } int bpf_object__unload(struct bpf_object *obj) @@ -1641,6 +1785,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); + btf__free(obj->btf); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); @@ -1692,6 +1837,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +int bpf_object__btf_fd(const struct bpf_object *obj) +{ + return obj->btf ? btf__fd(obj->btf) : -1; +} + int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv) { @@ -1845,11 +1995,12 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); +BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -static void bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type) +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type) { prog->expected_attach_type = type; } @@ -1859,6 +2010,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog, #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0) +#define BPF_S_PROG_SEC(string, ptype) \ + BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype) + #define BPF_SA_PROG_SEC(string, ptype) \ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype) @@ -1874,6 +2028,7 @@ static const struct { BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), @@ -1889,10 +2044,15 @@ static const struct { BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT), + BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG), + BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG), + BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND), + BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND), }; #undef BPF_PROG_SEC #undef BPF_PROG_SEC_FULL +#undef BPF_S_PROG_SEC #undef BPF_SA_PROG_SEC static int bpf_program__identify_section(struct bpf_program *prog) @@ -1929,6 +2089,16 @@ const char *bpf_map__name(struct bpf_map *map) return map ? map->name : NULL; } +uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map) +{ + return map ? map->btf_key_type_id : 0; +} + +uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map) +{ + return map ? map->btf_value_type_id : 0; +} + int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv) { @@ -2028,13 +2198,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, enum bpf_attach_type expected_attach_type; enum bpf_prog_type prog_type; struct bpf_object *obj; + struct bpf_map *map; int section_idx; int err; if (!attr) return -EINVAL; + if (!attr->file) + return -EINVAL; - obj = bpf_object__open(attr->file); + obj = __bpf_object__open(attr->file, NULL, 0, + bpf_prog_type__needs_kver(attr->prog_type)); if (IS_ERR_OR_NULL(obj)) return -ENOENT; @@ -2044,6 +2218,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, * section name. */ prog_type = attr->prog_type; + prog->prog_ifindex = attr->ifindex; expected_attach_type = attr->expected_attach_type; if (prog_type == BPF_PROG_TYPE_UNSPEC) { section_idx = bpf_program__identify_section(prog); @@ -2064,6 +2239,10 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, first_prog = prog; } + bpf_map__for_each(map, obj) { + map->map_ifindex = attr->ifindex; + } + if (!first_prog) { pr_warning("object file doesn't contain bpf program\n"); bpf_object__close(obj); @@ -2080,3 +2259,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, *prog_fd = bpf_program__fd(first_prog); return 0; } + +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv) +{ + volatile struct perf_event_mmap_page *header = mem; + __u64 data_tail = header->data_tail; + __u64 data_head = header->data_head; + void *base, *begin, *end; + int ret; + + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ + if (data_head == data_tail) + return LIBBPF_PERF_EVENT_CONT; + + base = ((char *)header) + page_size; + + begin = base + data_tail % size; + end = base + data_head % size; + + while (begin != end) { + struct perf_event_header *ehdr; + + ehdr = begin; + if (begin + ehdr->size > base + size) { + long len = base + size - begin; + + if (*buf_len < ehdr->size) { + free(*buf); + *buf = malloc(ehdr->size); + if (!*buf) { + ret = LIBBPF_PERF_EVENT_ERROR; + break; + } + *buf_len = ehdr->size; + } + + memcpy(*buf, begin, len); + memcpy(*buf + len, base, ehdr->size - len); + ehdr = (void *)*buf; + begin = base + ehdr->size - len; + } else if (begin + ehdr->size == base + size) { + begin = base; + } else { + begin += ehdr->size; + } + + ret = fn(ehdr, priv); + if (ret != LIBBPF_PERF_EVENT_CONT) + break; + + data_tail += ehdr->size; + } + + __sync_synchronize(); /* smp_mb() */ + header->data_tail = data_tail; + + return ret; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a3a62a583f27..09976531aa74 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -52,8 +52,8 @@ enum libbpf_errno { int libbpf_strerror(int err, char *buf, size_t size); /* - * In include/linux/compiler-gcc.h, __printf is defined. However - * it should be better if libbpf.h doesn't depend on Linux header file. + * __printf is defined in include/linux/compiler-gcc.h. However, + * it would be better if libbpf.h didn't depend on Linux header files. * So instead of __printf, here we use gcc attribute directly. */ typedef int (*libbpf_print_fn_t)(const char *, ...) @@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj); const char *bpf_object__name(struct bpf_object *obj); unsigned int bpf_object__kversion(struct bpf_object *obj); +int bpf_object__btf_fd(const struct bpf_object *obj); struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -91,7 +92,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv); void *bpf_object__priv(struct bpf_object *prog); -/* Accessors of bpf_program. */ +/* Accessors of bpf_program */ struct bpf_program; struct bpf_program *bpf_program__next(struct bpf_program *prog, struct bpf_object *obj); @@ -120,28 +121,28 @@ struct bpf_insn; /* * Libbpf allows callers to adjust BPF programs before being loaded - * into kernel. One program in an object file can be transform into - * multiple variants to be attached to different code. + * into kernel. One program in an object file can be transformed into + * multiple variants to be attached to different hooks. * * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd - * are APIs for this propose. + * form an API for this purpose. * * - bpf_program_prep_t: - * It defines 'preprocessor', which is a caller defined function + * Defines a 'preprocessor', which is a caller defined function * passed to libbpf through bpf_program__set_prep(), and will be * called before program is loaded. The processor should adjust - * the program one time for each instances according to the number + * the program one time for each instance according to the instance id * passed to it. * * - bpf_program__set_prep: - * Attachs a preprocessor to a BPF program. The number of instances - * whould be created is also passed through this function. + * Attaches a preprocessor to a BPF program. The number of instances + * that should be created is also passed through this function. * * - bpf_program__nth_fd: - * After the program is loaded, get resuling fds from bpf program for - * each instances. + * After the program is loaded, get resulting FD of a given instance + * of the BPF program. * - * If bpf_program__set_prep() is not used, the program whould be loaded + * If bpf_program__set_prep() is not used, the program would be loaded * without adjustment during bpf_object__load(). The program has only * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). @@ -155,7 +156,7 @@ struct bpf_prog_prep_result { struct bpf_insn *new_insn_ptr; int new_insn_cnt; - /* If not NULL, result fd is set to it */ + /* If not NULL, result FD is written to it. */ int *pfd; }; @@ -168,8 +169,8 @@ struct bpf_prog_prep_result { * - res: Output parameter, result of transformation. * * Return value: - * - Zero: pre-processing success. - * - Non-zero: pre-processing, stop loading. + * - Zero: pre-processing success. + * - Non-zero: pre-processing error, stop loading. */ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, @@ -181,19 +182,23 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, int bpf_program__nth_fd(struct bpf_program *prog, int n); /* - * Adjust type of bpf program. Default is kprobe. + * Adjust type of BPF program. Default is kprobe. */ int bpf_program__set_socket_filter(struct bpf_program *prog); int bpf_program__set_tracepoint(struct bpf_program *prog); +int bpf_program__set_raw_tracepoint(struct bpf_program *prog); int bpf_program__set_kprobe(struct bpf_program *prog); int bpf_program__set_sched_cls(struct bpf_program *prog); int bpf_program__set_sched_act(struct bpf_program *prog); int bpf_program__set_xdp(struct bpf_program *prog); int bpf_program__set_perf_event(struct bpf_program *prog); void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); bool bpf_program__is_socket_filter(struct bpf_program *prog); bool bpf_program__is_tracepoint(struct bpf_program *prog); +bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); bool bpf_program__is_kprobe(struct bpf_program *prog); bool bpf_program__is_sched_cls(struct bpf_program *prog); bool bpf_program__is_sched_act(struct bpf_program *prog); @@ -201,10 +206,10 @@ bool bpf_program__is_xdp(struct bpf_program *prog); bool bpf_program__is_perf_event(struct bpf_program *prog); /* - * We don't need __attribute__((packed)) now since it is - * unnecessary for 'bpf_map_def' because they are all aligned. - * In addition, using it will trigger -Wpacked warning message, - * and will be treated as an error due to -Werror. + * No need for __attribute__((packed)), all members of 'bpf_map_def' + * are all aligned. In addition, using __attribute__((packed)) + * would trigger a -Wpacked warning message, and lead to an error + * if -Werror is set. */ struct bpf_map_def { unsigned int type; @@ -215,8 +220,8 @@ struct bpf_map_def { }; /* - * There is another 'struct bpf_map' in include/linux/map.h. However, - * it is not a uapi header so no need to consider name clash. + * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, + * so no need to worry about a name clash. */ struct bpf_map; struct bpf_map * @@ -224,7 +229,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); /* * Get bpf_map through the offset of corresponding struct bpf_map_def - * in the bpf object file. + * in the BPF object file. */ struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); @@ -239,6 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); int bpf_map__fd(struct bpf_map *map); const struct bpf_map_def *bpf_map__def(struct bpf_map *map); const char *bpf_map__name(struct bpf_map *map); +uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map); +uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -252,6 +259,7 @@ struct bpf_prog_load_attr { const char *file; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; + int ifindex; }; int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, @@ -260,4 +268,17 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); + +enum bpf_perf_event_ret { + LIBBPF_PERF_EVENT_DONE = 0, + LIBBPF_PERF_EVENT_ERROR = -1, + LIBBPF_PERF_EVENT_CONT = -2, +}; + +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, + void *priv); +int bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, + void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv); #endif diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 689b6a130dd7..96d830545bbb 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -10,6 +10,12 @@ u8 kallsyms2elf_type(char type) return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT; } +bool kallsyms__is_function(char symbol_type) +{ + symbol_type = toupper(symbol_type); + return symbol_type == 'T' || symbol_type == 'W'; +} + int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)) diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index bc40101d72c1..72ab9870454b 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -20,6 +20,8 @@ static inline u8 kallsyms2elf_binding(char type) u8 kallsyms2elf_type(char type); +bool kallsyms__is_function(char symbol_type); + int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)); diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt index 956b1ae4aafb..33ba98d72b16 100644 --- a/tools/memory-model/Documentation/cheatsheet.txt +++ b/tools/memory-model/Documentation/cheatsheet.txt @@ -1,6 +1,6 @@ Prior Operation Subsequent Operation --------------- --------------------------- - C Self R W RWM Self R W DR DW RMW SV + C Self R W RMW Self R W DR DW RMW SV -- ---- - - --- ---- - - -- -- --- -- Store, e.g., WRITE_ONCE() Y Y @@ -14,7 +14,7 @@ smp_wmb() Y W Y Y W smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y smp_mb__before_atomic() CP Y Y Y a a a a Y -smp_mb__after_atomic() CP a a Y Y Y Y Y +smp_mb__after_atomic() CP a a Y Y Y Y Y Y Key: C: Ordering is cumulative @@ -26,4 +26,5 @@ Key: C: Ordering is cumulative DR: Dependent read (address dependency) DW: Dependent write (address, data, or control dependency) RMW: Atomic read-modify-write operation - SV Same-variable access + SELF: Orders self, as opposed to accesses before and/or after + SV: Orders later accesses to the same variable diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index a727c82bd434..1b09f3175a1f 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model 19. AND THEN THERE WAS ALPHA 20. THE HAPPENS-BEFORE RELATION: hb 21. THE PROPAGATES-BEFORE RELATION: pb - 22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path + 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb 23. ODDS AND ENDS @@ -1451,8 +1451,8 @@ they execute means that it cannot have cycles. This requirement is the content of the LKMM's "propagation" axiom. -RCU RELATIONS: link, gp-link, rscs-link, and rcu-path ------------------------------------------------------ +RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb +---------------------------------------------------- RCU (Read-Copy-Update) is a powerful synchronization mechanism. It rests on two concepts: grace periods and read-side critical sections. @@ -1509,8 +1509,8 @@ y, which occurs before the end of the critical section, did not propagate to P1 before the end of the grace period, violating the Guarantee. -In the kernel's implementations of RCU, the business about stores -propagating to every CPU is realized by placing strong fences at +In the kernel's implementations of RCU, the requirements for stores +to propagate to every CPU are fulfilled by placing strong fences at suitable places in the RCU-related code. Thus, if a critical section starts before a grace period does then the critical section's CPU will execute an smp_mb() fence after the end of the critical section and @@ -1523,72 +1523,124 @@ executes. What exactly do we mean by saying that a critical section "starts before" or "ends after" a grace period? Some aspects of the meaning are pretty obvious, as in the example above, but the details aren't -entirely clear. The LKMM formalizes this notion by means of a -relation with the unfortunately generic name "link". It is a very -general relation; among other things, X ->link Z includes cases where -X happens-before or is equal to some event Y which is equal to or -comes before Z in the coherence order. Taking Y = Z, this says that -X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z -and X ->co Z each imply X ->link Z. - -The formal definition of the link relation is more than a little +entirely clear. The LKMM formalizes this notion by means of the +rcu-link relation. rcu-link encompasses a very general notion of +"before": Among other things, X ->rcu-link Z includes cases where X +happens-before or is equal to some event Y which is equal to or comes +before Z in the coherence order. When Y = Z this says that X ->rfe Z +implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z +and X ->co Z each imply X ->rcu-link Z. + +The formal definition of the rcu-link relation is more than a little obscure, and we won't give it here. It is closely related to the pb relation, and the details don't matter unless you want to comb through a somewhat lengthy formal proof. Pretty much all you need to know -about link is the information in the preceding paragraph. - -The LKMM goes on to define the gp-link and rscs-link relations. They -bring grace periods and read-side critical sections into the picture, -in the following way: - - E ->gp-link F means there is a synchronize_rcu() fence event S - and an event X such that E ->po S, either S ->po X or S = X, - and X ->link F. In other words, E and F are connected by a - grace period followed by an instance of link. - - E ->rscs-link F means there is a critical section delimited by - an rcu_read_lock() fence L and an rcu_read_unlock() fence U, - and an event X such that E ->po U, either L ->po X or L = X, - and X ->link F. Roughly speaking, this says that some event - in the same critical section as E is connected by link to F. - -If we think of the link relation as standing for an extended "before", -then E ->gp-link F says that E executes before a grace period which -ends before F executes. (In fact it says more than this, because it -includes cases where E executes before a grace period and some store -propagates to F's CPU before F executes and doesn't propagate to some -other CPU until after the grace period ends.) Similarly, -E ->rscs-link F says that E is part of (or before the start of) a -critical section which starts before F executes. +about rcu-link is the information in the preceding paragraph. + +The LKMM also defines the gp and rscs relations. They bring grace +periods and read-side critical sections into the picture, in the +following way: + + E ->gp F means there is a synchronize_rcu() fence event S such + that E ->po S and either S ->po F or S = F. In simple terms, + there is a grace period po-between E and F. + + E ->rscs F means there is a critical section delimited by an + rcu_read_lock() fence L and an rcu_read_unlock() fence U, such + that E ->po U and either L ->po F or L = F. You can think of + this as saying that E and F are in the same critical section + (in fact, it also allows E to be po-before the start of the + critical section and F to be po-after the end). + +If we think of the rcu-link relation as standing for an extended +"before", then X ->gp Y ->rcu-link Z says that X executes before a +grace period which ends before Z executes. (In fact it covers more +than this, because it also includes cases where X executes before a +grace period and some store propagates to Z's CPU before Z executes +but doesn't propagate to some other CPU until after the grace period +ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or +before the start of) a critical section which starts before Z +executes. + +The LKMM goes on to define the rcu-fence relation as a sequence of gp +and rscs links separated by rcu-link links, in which the number of gp +links is >= the number of rscs links. For example: + + X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + +would imply that X ->rcu-fence V, because this sequence contains two +gp links and only one rscs link. (It also implies that X ->rcu-fence T +and Z ->rcu-fence V.) On the other hand: + + X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + +does not imply X ->rcu-fence V, because the sequence contains only +one gp link but two rscs links. + +The rcu-fence relation is important because the Grace Period Guarantee +means that rcu-fence acts kind of like a strong fence. In particular, +if W is a write and we have W ->rcu-fence Z, the Guarantee says that W +will propagate to every CPU before Z executes. + +To prove this in full generality requires some intellectual effort. +We'll consider just a very simple case: + + W ->gp X ->rcu-link Y ->rscs Z. + +This formula means that there is a grace period G and a critical +section C such that: + + 1. W is po-before G; + + 2. X is equal to or po-after G; + + 3. X comes "before" Y in some sense; + + 4. Y is po-before the end of C; + + 5. Z is equal to or po-after the start of C. + +From 2 - 4 we deduce that the grace period G ends before the critical +section C. Then the second part of the Grace Period Guarantee says +not only that G starts before C does, but also that W (which executes +on G's CPU before G starts) must propagate to every CPU before C +starts. In particular, W propagates to every CPU before Z executes +(or finishes executing, in the case where Z is equal to the +rcu_read_lock() fence event which starts C.) This sort of reasoning +can be expanded to handle all the situations covered by rcu-fence. + +Finally, the LKMM defines the RCU-before (rb) relation in terms of +rcu-fence. This is done in essentially the same way as the pb +relation was defined in terms of strong-fence. We will omit the +details; the end result is that E ->rb F implies E must execute before +F, just as E ->pb F does (and for much the same reasons). Putting this all together, the LKMM expresses the Grace Period -Guarantee by requiring that there are no cycles consisting of gp-link -and rscs-link connections in which the number of gp-link instances is ->= the number of rscs-link instances. It does this by defining the -rcu-path relation to link events E and F whenever it is possible to -pass from E to F by a sequence of gp-link and rscs-link connections -with at least as many of the former as the latter. The LKMM's "rcu" -axiom then says that there are no events E such that E ->rcu-path E. - -Justifying this axiom takes some intellectual effort, but it is in -fact a valid formalization of the Grace Period Guarantee. We won't -attempt to go through the detailed argument, but the following -analysis gives a taste of what is involved. Suppose we have a -violation of the first part of the Guarantee: A critical section -starts before a grace period, and some store propagates to the -critical section's CPU before the end of the critical section but -doesn't propagate to some other CPU until after the end of the grace -period. +Guarantee by requiring that the rb relation does not contain a cycle. +Equivalently, this "rcu" axiom requires that there are no events E and +F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the +axiom requires that there are no cycles consisting of gp and rscs +alternating with rcu-link, where the number of gp links is >= the +number of rscs links. + +Justifying the axiom isn't easy, but it is in fact a valid +formalization of the Grace Period Guarantee. We won't attempt to go +through the detailed argument, but the following analysis gives a +taste of what is involved. Suppose we have a violation of the first +part of the Guarantee: A critical section starts before a grace +period, and some store propagates to the critical section's CPU before +the end of the critical section but doesn't propagate to some other +CPU until after the end of the grace period. Putting symbols to these ideas, let L and U be the rcu_read_lock() and rcu_read_unlock() fence events delimiting the critical section in question, and let S be the synchronize_rcu() fence event for the grace period. Saying that the critical section starts before S means there are events E and F where E is po-after L (which marks the start of the -critical section), E is "before" F in the sense of the link relation, -and F is po-before the grace period S: +critical section), E is "before" F in the sense of the rcu-link +relation, and F is po-before the grace period S: - L ->po E ->link F ->po S. + L ->po E ->rcu-link F ->po S. Let W be the store mentioned above, let Z come before the end of the critical section and witness that W propagates to the critical @@ -1600,16 +1652,19 @@ some event X which is po-after S. Symbolically, this amounts to: The fr link from Y to W indicates that W has not propagated to Y's CPU at the time that Y executes. From this, it can be shown (see the -discussion of the link relation earlier) that X and Z are connected by -link, yielding: +discussion of the rcu-link relation earlier) that X and Z are related +by rcu-link, yielding: + + S ->po X ->rcu-link Z ->po U. + +The formulas say that S is po-between F and X, hence F ->gp X. They +also say that Z comes before the end of the critical section and E +comes after its start, hence Z ->rscs E. From all this we obtain: - S ->po X ->link Z ->po U. + F ->gp X ->rcu-link Z ->rscs E ->rcu-link F, -These formulas say that S is po-between F and X, hence F ->gp-link Z -via X. They also say that Z comes before the end of the critical -section and E comes after its start, hence Z ->rscs-link F via E. But -now we have a forbidden cycle: F ->gp-link Z ->rscs-link F. Thus the -"rcu" axiom rules out this violation of the Grace Period Guarantee. +a forbidden cycle. Thus the "rcu" axiom rules out this violation of +the Grace Period Guarantee. For something a little more down-to-earth, let's see how the axiom works out in practice. Consider the RCU code example from above, this @@ -1635,18 +1690,18 @@ time with statement labels added to the memory access instructions: } -If r2 = 0 at the end then P0's store at X overwrites the value -that P1's load at Z reads from, so we have Z ->fre X and thus -Z ->link X. In addition, there is a synchronize_rcu() between Y and -Z, so therefore we have Y ->gp-link X. +If r2 = 0 at the end then P0's store at X overwrites the value that +P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X. +In addition, there is a synchronize_rcu() between Y and Z, so therefore +we have Y ->gp Z. If r1 = 1 at the end then P1's load at Y reads from P0's store at W, -so we have W ->link Y. In addition, W and X are in the same critical -section, so therefore we have X ->rscs-link Y. +so we have W ->rcu-link Y. In addition, W and X are in the same critical +section, so therefore we have X ->rscs W. -This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link -and one rscs-link, violating the "rcu" axiom. Hence the outcome is -not allowed by the LKMM, as we would expect. +Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle, +violating the "rcu" axiom. Hence the outcome is not allowed by the +LKMM, as we would expect. For contrast, let's see what can happen in a more complicated example: @@ -1682,15 +1737,11 @@ For contrast, let's see what can happen in a more complicated example: } If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows -that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W -via V. And just as before, this gives a cycle: - - W ->rscs-link Y ->gp-link U ->rscs-link W. - -However, this cycle has fewer gp-link instances than rscs-link -instances, and consequently the outcome is not forbidden by the LKMM. -The following instruction timing diagram shows how it might actually -occur: +that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W. +However this cycle is not forbidden, because the sequence of relations +contains fewer instances of gp (one) than of rscs (two). Consequently +the outcome is allowed by the LKMM. The following instruction timing +diagram shows how it might actually occur: P0 P1 P2 -------------------- -------------------- -------------------- diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt index ba2e34c2ec3f..b177f3e4a614 100644 --- a/tools/memory-model/Documentation/references.txt +++ b/tools/memory-model/Documentation/references.txt @@ -63,15 +63,22 @@ o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis, Principles of Programming Languages (POPL 2017). ACM, New York, NY, USA, 429–442. +o Christopher Pulte, Shaked Flur, Will Deacon, Jon French, + Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency: + multicopy-atomic axiomatic and operational models for ARMv8". In + Proceedings of the ACM on Programming Languages, Volume 2, Issue + POPL, Article No. 19. ACM, New York, NY, USA. + Linux-kernel memory model ========================= -o Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney, - and Jade Alglave. 2017. "A formal model of - Linux-kernel memory ordering - companion webpage". - http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online; - accessed 30-January-2017]. +o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and + Alan Stern. 2018. "Frightening small children and disconcerting + grown-ups: Concurrency in the Linux kernel". In Proceedings of + the 23rd International Conference on Architectural Support for + Programming Languages and Operating Systems (ASPLOS 2018). ACM, + New York, NY, USA, 405-418. Webpage: http://diy.inria.fr/linux/. o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)" diff --git a/tools/memory-model/README b/tools/memory-model/README index 0b3a5f3c9ccd..734f7feaa5dc 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel. REQUIREMENTS ============ -Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded +Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded separately: https://github.com/herd/herdtools7 diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 432c7cf71b23..64f5740e0e75 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -5,10 +5,10 @@ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, * Andrea Parri <parri.andrea@gmail.com> * - * An earlier version of this file appears in the companion webpage for + * An earlier version of this file appeared in the companion webpage for * "Frightening small children and disconcerting grown-ups: Concurrency * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, - * which is to appear in ASPLOS 2018. + * which appeared in ASPLOS 2018. *) "Linux-kernel memory consistency model" diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index df97db03b6c2..59b5cbe6b624 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -5,10 +5,10 @@ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, * Andrea Parri <parri.andrea@gmail.com> * - * An earlier version of this file appears in the companion webpage for + * An earlier version of this file appeared in the companion webpage for * "Frightening small children and disconcerting grown-ups: Concurrency * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, - * which is to appear in ASPLOS 2018. + * which appeared in ASPLOS 2018. *) "Linux-kernel memory consistency model" @@ -100,22 +100,29 @@ let rscs = po ; crit^-1 ; po? * one but two non-rf relations, but only in conjunction with an RCU * read-side critical section. *) -let link = hb* ; pb* ; prop +let rcu-link = hb* ; pb* ; prop -(* Chains that affect the RCU grace-period guarantee *) -let gp-link = gp ; link -let rscs-link = rscs ; link +(* + * Any sequence containing at least as many grace periods as RCU read-side + * critical sections (joined by rcu-link) acts as a generalized strong fence. + *) +let rec rcu-fence = gp | + (gp ; rcu-link ; rscs) | + (rscs ; rcu-link ; gp) | + (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) | + (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) | + (rcu-fence ; rcu-link ; rcu-fence) + +(* rb orders instructions just as pb does *) +let rb = prop ; rcu-fence ; hb* ; pb* + +irreflexive rb as rcu (* - * A cycle containing at least as many grace periods as RCU read-side - * critical sections is forbidden. + * The happens-before, propagation, and rcu constraints are all + * expressions of temporal ordering. They could be replaced by + * a single constraint on an "executes-before" relation, xb: + * + * let xb = hb | pb | rb + * acyclic xb as executes-before *) -let rec rcu-path = - gp-link | - (gp-link ; rscs-link) | - (rscs-link ; gp-link) | - (rcu-path ; rcu-path) | - (gp-link ; rcu-path ; rscs-link) | - (rscs-link ; rcu-path ; gp-link) - -irreflexive rcu-path as rcu diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index 397e4e67e8c8..6fa3eb28d40b 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0+ // -// An earlier version of this file appears in the companion webpage for +// An earlier version of this file appeared in the companion webpage for // "Frightening small children and disconcerting grown-ups: Concurrency // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, -// which is to appear in ASPLOS 2018. +// which appeared in ASPLOS 2018. // ONCE READ_ONCE(X) __load{once}(X) @@ -14,14 +14,15 @@ smp_store_release(X,V) { __store{release}(*X,V); } smp_load_acquire(X) __load{acquire}(*X) rcu_assign_pointer(X,V) { __store{release}(X,V); } rcu_dereference(X) __load{once}(X) +smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; } // Fences -smp_mb() { __fence{mb} ; } -smp_rmb() { __fence{rmb} ; } -smp_wmb() { __fence{wmb} ; } -smp_mb__before_atomic() { __fence{before-atomic} ; } -smp_mb__after_atomic() { __fence{after-atomic} ; } -smp_mb__after_spinlock() { __fence{after-spinlock} ; } +smp_mb() { __fence{mb}; } +smp_rmb() { __fence{rmb}; } +smp_wmb() { __fence{wmb}; } +smp_mb__before_atomic() { __fence{before-atomic}; } +smp_mb__after_atomic() { __fence{after-atomic}; } +smp_mb__after_spinlock() { __fence{after-spinlock}; } // Exchange xchg(X,V) __xchg{mb}(X,V) @@ -34,26 +35,27 @@ cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W) cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W) // Spinlocks -spin_lock(X) { __lock(X) ; } -spin_unlock(X) { __unlock(X) ; } +spin_lock(X) { __lock(X); } +spin_unlock(X) { __unlock(X); } spin_trylock(X) __trylock(X) +spin_is_locked(X) __islocked(X) // RCU rcu_read_lock() { __fence{rcu-lock}; } -rcu_read_unlock() { __fence{rcu-unlock};} +rcu_read_unlock() { __fence{rcu-unlock}; } synchronize_rcu() { __fence{sync-rcu}; } synchronize_rcu_expedited() { __fence{sync-rcu}; } // Atomic atomic_read(X) READ_ONCE(*X) -atomic_set(X,V) { WRITE_ONCE(*X,V) ; } +atomic_set(X,V) { WRITE_ONCE(*X,V); } atomic_read_acquire(X) smp_load_acquire(X) atomic_set_release(X,V) { smp_store_release(X,V); } -atomic_add(V,X) { __atomic_op(X,+,V) ; } -atomic_sub(V,X) { __atomic_op(X,-,V) ; } -atomic_inc(X) { __atomic_op(X,+,1) ; } -atomic_dec(X) { __atomic_op(X,-,1) ; } +atomic_add(V,X) { __atomic_op(X,+,V); } +atomic_sub(V,X) { __atomic_op(X,-,V); } +atomic_inc(X) { __atomic_op(X,+,1); } +atomic_dec(X) { __atomic_op(X,-,1); } atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V) atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V) diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore new file mode 100644 index 000000000000..6e2ddc54152f --- /dev/null +++ b/tools/memory-model/litmus-tests/.gitignore @@ -0,0 +1 @@ +*.litmus.out diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus index 50d5db9ea983..98a3716efa37 100644 --- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus @@ -7,7 +7,7 @@ C IRIW+mbonceonces+OnceOnce * between each pairs of reads. In other words, is smp_mb() sufficient to * cause two different reading processes to agree on the order of a pair * of writes, where each write is to a different variable by a different - * process? + * process? This litmus test exercises LKMM's "propagation" rule. *) {} diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus new file mode 100644 index 000000000000..50f4d62bbf0e --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus @@ -0,0 +1,35 @@ +C MP+polockmbonce+poacquiresilsil + +(* + * Result: Never + * + * Do spinlocks combined with smp_mb__after_spinlock() provide order + * to outside observers using spin_is_locked() to sense the lock-held + * state, ordered by acquire? Note that when the first spin_is_locked() + * returns false and the second true, we know that the smp_load_acquire() + * executed before the lock was acquired (loosely speaking). + *) + +{ +} + +P0(spinlock_t *lo, int *x) +{ + spin_lock(lo); + smp_mb__after_spinlock(); + WRITE_ONCE(*x, 1); + spin_unlock(lo); +} + +P1(spinlock_t *lo, int *x) +{ + int r1; + int r2; + int r3; + + r1 = smp_load_acquire(x); + r2 = spin_is_locked(lo); + r3 = spin_is_locked(lo); +} + +exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus new file mode 100644 index 000000000000..abf81e7a0895 --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus @@ -0,0 +1,34 @@ +C MP+polockonce+poacquiresilsil + +(* + * Result: Sometimes + * + * Do spinlocks provide order to outside observers using spin_is_locked() + * to sense the lock-held state, ordered by acquire? Note that when the + * first spin_is_locked() returns false and the second true, we know that + * the smp_load_acquire() executed before the lock was acquired (loosely + * speaking). + *) + +{ +} + +P0(spinlock_t *lo, int *x) +{ + spin_lock(lo); + WRITE_ONCE(*x, 1); + spin_unlock(lo); +} + +P1(spinlock_t *lo, int *x) +{ + int r1; + int r2; + int r3; + + r1 = smp_load_acquire(x); + r2 = spin_is_locked(lo); + r3 = spin_is_locked(lo); +} + +exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index 04096fb8b8d9..17eb9a8c222d 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README @@ -23,7 +23,8 @@ IRIW+mbonceonces+OnceOnce.litmus between each pairs of reads. In other words, is smp_mb() sufficient to cause two different reading processes to agree on the order of a pair of writes, where each write is to a different - variable by a different process? + variable by a different process? This litmus test is forbidden + by LKMM's propagation rule. IRIW+poonceonces+OnceOnce.litmus Test of independent reads from independent writes with nothing @@ -63,6 +64,16 @@ LB+poonceonces.litmus MP+onceassign+derefonce.litmus As below, but with rcu_assign_pointer() and an rcu_dereference(). +MP+polockmbonce+poacquiresilsil.litmus + Protect the access with a lock and an smp_mb__after_spinlock() + in one process, and use an acquire load followed by a pair of + spin_is_locked() calls in the other process. + +MP+polockonce+poacquiresilsil.litmus + Protect the access with a lock in one process, and use an + acquire load followed by a pair of spin_is_locked() calls + in the other process. + MP+polocks.litmus As below, but with the second access of the writer process and the first access of reader process protected by a lock. @@ -109,8 +120,10 @@ S+wmbonceonce+poacquireonce.litmus WRC+poonceonces+Once.litmus WRC+pooncerelease+rmbonceonce+Once.litmus - These two are members of an extension of the MP litmus-test class - in which the first write is moved to a separate process. + These two are members of an extension of the MP litmus-test + class in which the first write is moved to a separate process. + The second is forbidden because smp_store_release() is + A-cumulative in LKMM. Z6.0+pooncelock+pooncelock+pombonce.litmus Is the ordering provided by a spin_unlock() and a subsequent diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus index 97fcbffde9a0..ad3448b941e6 100644 --- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus @@ -5,7 +5,9 @@ C WRC+pooncerelease+rmbonceonce+Once * * This litmus test is an extension of the message-passing pattern, where * the first write is moved to a separate process. Because it features - * a release and a read memory barrier, it should be forbidden. + * a release and a read memory barrier, it should be forbidden. More + * specifically, this litmus test is forbidden because smp_store_release() + * is A-cumulative in LKMM. *) {} diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index ba4a4ec6d313..305ded17e741 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -4,46 +4,72 @@ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu> *) -(* Generate coherence orders and handle lock operations *) +(* + * Generate coherence orders and handle lock operations + * + * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48. + * spin_is_locked() is functional from herd7 version 7.49. + *) include "cross.cat" -(* From lock reads to their partner lock writes *) -let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po) -let rmw = rmw | lk-rmw - (* - * A paired LKR must always see an unlocked value; spin_lock() calls nested - * inside a critical section (for the same lock) always deadlock. + * The lock-related events generated by herd are as follows: + * + * LKR Lock-Read: the read part of a spin_lock() or successful + * spin_trylock() read-modify-write event pair + * LKW Lock-Write: the write part of a spin_lock() or successful + * spin_trylock() RMW event pair + * UL Unlock: a spin_unlock() event + * LF Lock-Fail: a failed spin_trylock() event + * RL Read-Locked: a spin_is_locked() event which returns True + * RU Read-Unlocked: a spin_is_locked() event which returns False + * + * LKR and LKW events always come paired, like all RMW event sequences. + * + * LKR, LF, RL, and RU are read events; LKR has Acquire ordering. + * LKW and UL are write events; UL has Release ordering. + * LKW, LF, RL, and RU have no ordering properties. *) -empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc) - as lock-nest -(* The litmus test is invalid if an LKW event is not part of an RMW pair *) -flag ~empty LKW \ range(lk-rmw) as unpaired-LKW +(* Backward compatibility *) +let RL = try RL with emptyset +let RU = try RU with emptyset -(* This will be allowed if we implement spin_is_locked() *) -flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR +(* Treat RL as a kind of LF: a read with no ordering properties *) +let LF = LF | RL -(* There should be no R or W accesses to spinlocks *) -let ALL-LOCKS = LKR | LKW | UL | LF +(* There should be no ordinary R or W accesses to spinlocks *) +let ALL-LOCKS = LKR | LKW | UL | LF | RU flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses +(* Link Lock-Reads to their RMW-partner Lock-Writes *) +let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po) +let rmw = rmw | lk-rmw + +(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *) +flag ~empty LKW \ range(lk-rmw) as unpaired-LKW +flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR + +(* + * An LKR must always see an unlocked value; spin_lock() calls nested + * inside a critical section (for the same lock) always deadlock. + *) +empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest + (* The final value of a spinlock should not be tested *) flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final - (* * Put lock operations in their appropriate classes, but leave UL out of W * until after the co relation has been generated. *) -let R = R | LKR | LF +let R = R | LKR | LF | RU let W = W | LKW let Release = Release | UL let Acquire = Acquire | LKR - (* Match LKW events to their corresponding UL events *) let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc) @@ -53,27 +79,48 @@ flag ~empty UL \ range(critical) as unmatched-unlock let UNMATCHED-LKW = LKW \ domain(critical) empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks - (* rfi for LF events: link each LKW to the LF events in its critical section *) let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) (* rfe for LF events *) let all-possible-rfe-lf = - (* - * Given an LF event r, compute the possible rfe edges for that event - * (all those starting from LKW events in other threads), - * and then convert that relation to a set of single-edge relations. - *) - let possible-rfe-lf r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation ((LKW * {r}) & loc & ext) - (* Do this for each LF event r that isn't in rfi-lf *) - in map possible-rfe-lf (LF \ range(rfi-lf)) + (* + * Given an LF event r, compute the possible rfe edges for that event + * (all those starting from LKW events in other threads), + * and then convert that relation to a set of single-edge relations. + *) + let possible-rfe-lf r = + let pair-to-relation p = p ++ 0 + in map pair-to-relation ((LKW * {r}) & loc & ext) + (* Do this for each LF event r that isn't in rfi-lf *) + in map possible-rfe-lf (LF \ range(rfi-lf)) (* Generate all rf relations for LF events *) with rfe-lf from cross(all-possible-rfe-lf) -let rf = rf | rfi-lf | rfe-lf +let rf-lf = rfe-lf | rfi-lf + +(* + * RU, i.e., spin_is_locked() returning False, is slightly different. + * We rely on the memory model to rule out cases where spin_is_locked() + * within one of the lock's critical sections returns False. + *) + +(* rfi for RU events: an RU may read from the last po-previous UL *) +let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc) + +(* rfe for RU events: an RU may read from an external UL or the initial write *) +let all-possible-rfe-ru = + let possible-rfe-ru r = + let pair-to-relation p = p ++ 0 + in map pair-to-relation (((UL | IW) * {r}) & loc & ext) + in map possible-rfe-ru RU + +(* Generate all rf relations for RU events *) +with rfe-ru from cross(all-possible-rfe-ru) +let rf-ru = rfe-ru | rfi-ru +(* Final rf relation *) +let rf = rf | rf-lf | rf-ru (* Generate all co relations, including LKW events but not UL *) let co0 = co0 | ([IW] ; loc ; [LKW]) | diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh new file mode 100644 index 000000000000..af0aa15ab84e --- /dev/null +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -0,0 +1,73 @@ +#!/bin/sh +# +# Run herd tests on all .litmus files in the specified directory (which +# defaults to litmus-tests) and check each file's result against a "Result:" +# comment within that litmus test. If the verification result does not +# match that specified in the litmus test, this script prints an error +# message prefixed with "^^^". It also outputs verification results to +# a file whose name is that of the specified litmus test, but with ".out" +# appended. +# +# Usage: +# sh checkalllitmus.sh [ directory ] +# +# The LINUX_HERD_OPTIONS environment variable may be used to specify +# arguments to herd, whose default is defined by the checklitmus.sh script. +# Thus, one would normally run this in the directory containing the memory +# model, specifying the pathname of the litmus test to check. +# +# This script makes no attempt to run the litmus tests concurrently. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright IBM Corporation, 2018 +# +# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +litmusdir=${1-litmus-tests} +if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir" +then + : +else + echo ' --- ' error: $litmusdir is not an accessible directory + exit 255 +fi + +# Find the checklitmus script. If it is not where we expect it, then +# assume that the caller has the PATH environment variable set +# appropriately. +if test -x scripts/checklitmus.sh +then + clscript=scripts/checklitmus.sh +else + clscript=checklitmus.sh +fi + +# Run the script on all the litmus tests in the specified directory +ret=0 +for i in litmus-tests/*.litmus +do + if ! $clscript $i + then + ret=1 + fi +done +if test "$ret" -ne 0 +then + echo " ^^^ VERIFICATION MISMATCHES" +else + echo All litmus tests verified as was expected. +fi +exit $ret diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh new file mode 100644 index 000000000000..e2e477472844 --- /dev/null +++ b/tools/memory-model/scripts/checklitmus.sh @@ -0,0 +1,86 @@ +#!/bin/sh +# +# Run a herd test and check the result against a "Result:" comment within +# the litmus test. If the verification result does not match that specified +# in the litmus test, this script prints an error message prefixed with +# "^^^" and exits with a non-zero status. It also outputs verification +# results to a file whose name is that of the specified litmus test, but +# with ".out" appended. +# +# Usage: +# sh checklitmus.sh file.litmus +# +# The LINUX_HERD_OPTIONS environment variable may be used to specify +# arguments to herd, which default to "-conf linux-kernel.cfg". Thus, +# one would normally run this in the directory containing the memory model, +# specifying the pathname of the litmus test to check. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright IBM Corporation, 2018 +# +# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + +litmus=$1 +herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg} + +if test -f "$litmus" -a -r "$litmus" +then + : +else + echo ' --- ' error: \"$litmus\" is not a readable file + exit 255 +fi +if grep -q '^ \* Result: ' $litmus +then + outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` +else + outcome=specified +fi + +echo Herd options: $herdoptions > $litmus.out +/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1 +grep "Herd options:" $litmus.out +grep '^Observation' $litmus.out +if grep -q '^Observation' $litmus.out +then + : +else + cat $litmus.out + echo ' ^^^ Verification error' + echo ' ^^^ Verification error' >> $litmus.out 2>&1 + exit 255 +fi +if test "$outcome" = DEADLOCK +then + echo grep 3 and 4 + if grep '^Observation' $litmus.out | grep -q 'Never 0 0$' + then + ret=0 + else + echo " ^^^ Unexpected non-$outcome verification" + echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1 + ret=1 + fi +elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe +then + ret=0 +else + echo " ^^^ Unexpected non-$outcome verification" + echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1 + ret=1 +fi +tail -2 $litmus.out | head -1 +exit $ret diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3a31b238f885..f4a25bd1871f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -164,6 +164,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "lbug_with_loc", "fortify_panic", "usercopy_abort", + "machine_real_restart", }; if (func->bind == STB_WEAK) @@ -543,6 +544,28 @@ static int add_jump_destinations(struct objtool_file *file) dest_off); return -1; } + + /* + * For GCC 8+, create parent/child links for any cold + * subfunctions. This is _mostly_ redundant with a similar + * initialization in read_symbols(). + * + * If a function has aliases, we want the *first* such function + * in the symbol table to be the subfunction's parent. In that + * case we overwrite the initialization done in read_symbols(). + * + * However this code can't completely replace the + * read_symbols() code because this doesn't detect the case + * where the parent function's only reference to a subfunction + * is through a switch table. + */ + if (insn->func && insn->jump_dest->func && + insn->func != insn->jump_dest->func && + !strstr(insn->func->name, ".cold.") && + strstr(insn->jump_dest->func->name, ".cold.")) { + insn->func->cfunc = insn->jump_dest->func; + insn->jump_dest->func->pfunc = insn->func; + } } return 0; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e60e105583e..7ec85d567598 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -302,19 +302,34 @@ static int read_symbols(struct elf *elf) continue; sym->pfunc = sym->cfunc = sym; coldstr = strstr(sym->name, ".cold."); - if (coldstr) { - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; - - if (!pfunc) { - WARN("%s(): can't find parent function", - sym->name); - goto err; - } - - sym->pfunc = pfunc; - pfunc->cfunc = sym; + if (!coldstr) + continue; + + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + + /* + * Unfortunately, -fnoreorder-functions puts the child + * inside the parent. Remove the overlap so we can + * have sane assumptions. + * + * Note that pfunc->len now no longer matches + * pfunc->sym.st_size. + */ + if (sym->sec == pfunc->sec && + sym->offset >= pfunc->offset && + sym->offset + sym->len == pfunc->offset + pfunc->len) { + pfunc->len -= sym->len; } } } @@ -504,10 +519,12 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_flags = SHF_ALLOC; - /* Add section name to .shstrtab */ + /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); + if (!shstrtab) + shstrtab = find_section_by_name(elf, ".strtab"); if (!shstrtab) { - WARN("can't find .shstrtab section"); + WARN("can't find .shstrtab or .strtab section"); return NULL; } diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index db11478e30b4..42261a9b280e 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -47,7 +47,8 @@ man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe +ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf +ASCIIDOC_HTML = xhtml11 MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install @@ -55,6 +56,14 @@ RM ?= rm -f DOC_REF = origin/man HTML_REF = origin/html +ifdef USE_ASCIIDOCTOR +ASCIIDOC = asciidoctor +ASCIIDOC_EXTRA = -a compat-mode +ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions +ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual" +ASCIIDOC_HTML = xhtml5 +endif + infodir?=$(prefix)/share/info MAKEINFO=makeinfo INSTALL_INFO=install-info @@ -73,10 +82,12 @@ ifeq ($(_tmp_tool_path),) missing_tools = $(ASCIIDOC) endif +ifndef USE_ASCIIDOCTOR _tmp_tool_path := $(call get-executable,$(XMLTO)) ifeq ($(_tmp_tool_path),) missing_tools += $(XMLTO) endif +endif # # For asciidoc ... @@ -264,9 +275,17 @@ clean: $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ + $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + mv $@+ $@ + +ifdef USE_ASCIIDOCTOR +$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ +endif $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(QUIET_XMLTO)$(RM) $@ && \ @@ -274,7 +293,7 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ + $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ @@ -321,13 +340,13 @@ howto-index.txt: howto-index.sh $(wildcard howto/*.txt) mv $@+ $@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt - $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt + $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt WEBDOC_DEST = /pub/software/tools/perf/docs $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ + sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \ mv $@+ $@ # UNIMPLEMENTED diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb new file mode 100644 index 000000000000..d148fe95c0c4 --- /dev/null +++ b/tools/perf/Documentation/asciidoctor-extensions.rb @@ -0,0 +1,29 @@ +require 'asciidoctor' +require 'asciidoctor/extensions' + +module Perf + module Documentation + class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor + use_dsl + + named :chrome + + def process(parent, target, attrs) + if parent.document.basebackend? 'html' + %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n) + elsif parent.document.basebackend? 'manpage' + "#{target}(#{attrs[1]})" + elsif parent.document.basebackend? 'docbook' + "<citerefentry>\n" \ + "<refentrytitle>#{target}</refentrytitle>" \ + "<manvolnum>#{attrs[1]}</manvolnum>\n" \ + "</citerefentry>\n" + end + end + end + end +end + +Asciidoctor::Extensions.register do + inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf +end diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 73c2650bd0db..f6de0952ff3c 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -48,6 +48,9 @@ OPTIONS --purge=:: Purge all cached binaries including older caches which have specified path from the cache. +-P:: +--purge-all:: + Purge all cached binaries. This will flush out entire cache. -M:: --missing=:: List missing build ids in the cache for the specified file. @@ -59,7 +62,9 @@ OPTIONS exactly same build-id, that is replaced by new one. It can be used to update kallsyms and kernel dso to vmlinux in order to support annotation. - +-l:: +--list:: + List all valid binaries from cache. -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 2549c34a7895..11300dbe35c5 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -124,7 +124,11 @@ The available PMUs and their raw parameters can be listed with For example the raw event "LSD.UOPS" core pmu event above could be specified as - perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ... + perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=0x1/ ... + + or using extended name syntax + + perf stat -e cpu/event=0xa8,umask=0x1,cmask=0x1,name=\'LSD.UOPS_CYCLES:cmask=0x1\'/ ... PER SOCKET PMUS --------------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index cc37b3a4be76..04168da4268e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -57,6 +57,9 @@ OPTIONS FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and "no" for disable callgraph. - 'stack-size': user stack size for dwarf mode + - 'name' : User defined event name. Single quotes (') may be used to + escape symbols in the name from parsing by shell and tool + like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. See the linkperf:perf-list[1] man page for more parameters. diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 51ec2d20068a..0fb9eda3cbca 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -610,6 +610,32 @@ Various utility functions for use with perf script: nsecs_str(nsecs) - returns printable string in the form secs.nsecs avg(total, n) - returns average given a sum and a total number of values +SUPPORTED FIELDS +---------------- + +Currently supported fields: + +ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr, +symbol, dso, time_enabled, time_running, values, callchain, +brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs, +weight, transaction, raw_buf, attr. + +Some fields have sub items: + +brstack: + from, to, from_dsoname, to_dsoname, mispred, + predicted, in_tx, abort, cycles. + +brstacksym: + items: from, to, pred, in_tx, abort (converted string) + +For example, +We can use this code to print brstack "from", "to", "cycles". + +if 'brstack' in dict: + for entry in dict['brstack']: + print "from %s, to %s, cycles %s" % (entry["from"], entry["to"], entry["cycles"]) + SEE ALSO -------- linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index e6c3b4e555c2..b10a90b6a718 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -116,6 +116,22 @@ Do not aggregate counts across all monitored CPUs. print counts using a CSV-style output to make it easy to import directly into spreadsheets. Columns are separated by the string specified in SEP. +--table:: Display time for each run (-r option), in a table format, e.g.: + + $ perf stat --null -r 5 --table perf bench sched pipe + + Performance counter stats for 'perf bench sched pipe' (5 runs): + + # Table of individual measurements: + 5.189 (-0.293) # + 5.189 (-0.294) # + 5.186 (-0.296) # + 5.663 (+0.181) ## + 6.186 (+0.703) #### + + # Final result: + 5.483 +- 0.198 seconds time elapsed ( +- 3.62% ) + -G name:: --cgroup name:: monitor only in the container (cgroup) called "name". This option is available only @@ -162,6 +178,9 @@ Print count deltas for fixed number of times. This option should be used together with "-I" option. example: 'perf stat -I 1000 --interval-count 2 -e cycles -a' +--interval-clear:: +Clear the screen before next interval. + --timeout msecs:: Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms). This option is not supported with the "-I" option. @@ -294,20 +313,38 @@ Users who wants to get the actual value can apply --no-metric-only. EXAMPLES -------- -$ perf stat -- make -j +$ perf stat -- make + + Performance counter stats for 'make': + + 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized + 0 context-switches:u # 0.000 K/sec + 0 cpu-migrations:u # 0.000 K/sec + 3,228,188 page-faults:u # 0.039 M/sec + 229,570,665,834 cycles:u # 2.742 GHz + 313,163,853,778 instructions:u # 1.36 insn per cycle + 69,704,684,856 branches:u # 832.559 M/sec + 2,078,861,393 branch-misses:u # 2.98% of all branches + + 83.409183620 seconds time elapsed + + 74.684747000 seconds user + 8.739217000 seconds sys + +TIMINGS +------- +As displayed in the example above we can display 3 types of timings. +We always display the time the counters were enabled/alive: + + 83.409183620 seconds time elapsed - Performance counter stats for 'make -j': +For workload sessions we also display time the workloads spent in +user/system lands: - 8117.370256 task clock ticks # 11.281 CPU utilization factor - 678 context switches # 0.000 M/sec - 133 CPU migrations # 0.000 M/sec - 235724 pagefaults # 0.029 M/sec - 24821162526 CPU cycles # 3057.784 M/sec - 18687303457 instructions # 2302.138 M/sec - 172158895 cache references # 21.209 M/sec - 27075259 cache misses # 3.335 M/sec + 74.684747000 seconds user + 8.739217000 seconds sys - Wall-clock time elapsed: 719.554352 msecs +Those times are the very same as displayed by the 'time' tool. CSV FORMAT ---------- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index d00f0d51cab8..dfb218feaad9 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m) A structure defining the number of CPUs. struct nr_cpus { - uint32_t nr_cpus_online; uint32_t nr_cpus_available; /* CPUs not yet onlined */ + uint32_t nr_cpus_online; }; HEADER_CPUDESC = 8, @@ -153,10 +153,18 @@ struct { HEADER_CPU_TOPOLOGY = 13, String lists defining the core and CPU threads topology. +The string lists are followed by a variable length array +which contains core_id and socket_id of each cpu. +The number of entries can be determined by the size of the +section minus the sizes of both string lists. struct { struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list threads; /* Variable length */ + struct { + uint32_t core_id; + uint32_t socket_id; + } cpus[nr]; /* Variable length records */ }; Example: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ae7dc46e8f8a..f5a3b402589e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -207,8 +207,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) + PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -885,6 +884,8 @@ endif # Among the variables below, these: # perfexecdir +# perf_include_dir +# perf_examples_dir # template_dir # mandir # infodir @@ -904,6 +905,8 @@ bindir = $(abspath $(prefix)/$(bindir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core +perf_include_dir = lib/include/perf +perf_examples_dir = lib/examples/perf sharedir = $(prefix)/share template_dir = share/perf-core/templates STRACE_GROUPS_DIR = share/perf-core/strace/groups @@ -934,6 +937,8 @@ bindir_SQ = $(subst ','\'',$(bindir)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) +perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir)) +perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) tipdir_SQ = $(subst ','\'',$(tipdir)) @@ -944,14 +949,20 @@ srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) +perf_include_instdir = $(perf_include_dir) +perf_examples_instdir = $(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) +perf_include_instdir = $(prefix)/$(perf_include_dir) +perf_examples_instdir = $(prefix)/$(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) tip_instdir = $(prefix)/$(tipdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) +perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir)) +perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir)) STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) @@ -999,6 +1010,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) +$(call detected_var,perf_include_dir_SQ) +$(call detected_var,perf_examples_dir_SQ) $(call detected_var,tipdir_SQ) $(call detected_var,srcdir_SQ) $(call detected_var,LIBDIR) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 83e453de36f8..ecc9fc952655 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -767,6 +767,16 @@ ifndef NO_JVMTI endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBBPF + $(call QUIET_INSTALL, lib) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' + $(call QUIET_INSTALL, include/bpf) \ + $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' + $(call QUIET_INSTALL, lib) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' + $(call QUIET_INSTALL, examples/bpf) \ + $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' +endif $(call QUIET_INSTALL, perf-archive) \ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-with-kcore) \ diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 8cb347760233..9a0242e74cfc 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c @@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index e907f0f4c20c..5522ce384723 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM64_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index c6f373508a4f..82657c01a3b8 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -189,7 +189,7 @@ out_error: return -1; } -int perf_env__lookup_objdump(struct perf_env *env) +int perf_env__lookup_objdump(struct perf_env *env, const char **path) { /* * For live mode, env->arch will be NULL and we can use @@ -198,5 +198,5 @@ int perf_env__lookup_objdump(struct perf_env *env) if (env->arch == NULL) return 0; - return perf_env__lookup_binutils_path(env, "objdump", &objdump_path); + return perf_env__lookup_binutils_path(env, "objdump", path); } diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 2d875baa92e6..2167001b18c5 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -4,8 +4,6 @@ #include "../util/env.h" -extern const char *objdump_path; - -int perf_env__lookup_objdump(struct perf_env *env); +int perf_env__lookup_objdump(struct perf_env *env, const char **path); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 30cbbd6d5be0..5f39efef0856 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_POWERPC_R1]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 0c370f81e002..ef5d59a5742e 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -243,13 +243,12 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) u64 ip; u64 skip_slot = -1; - if (chain->nr < 3) + if (!chain || chain->nr < 3) return skip_slot; ip = chain->ips[2]; - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); + thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); if (al.map) dso = al.map->dso; diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 4dfe42666d0c..f0b1709a5ffb 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -341,6 +341,8 @@ 330 common pkey_alloc __x64_sys_pkey_alloc 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx +333 common io_pgetevents __x64_sys_io_pgetevents +334 common rseq __x64_sys_rseq # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 95036c7a59e8..7879df34569a 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_X86_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index f95e6f46ef0d..844b8f335532 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -4,6 +4,8 @@ libperf-y += pmu.o libperf-y += kvm-stat.o libperf-y += perf_regs.o libperf-y += group.o +libperf-y += machine.o +libperf-y += event.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c new file mode 100644 index 000000000000..675a0213044d --- /dev/null +++ b/tools/perf/arch/x86/util/event.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> + +#include "../../util/machine.h" +#include "../../util/tool.h" +#include "../../util/map.h" +#include "../../util/util.h" +#include "../../util/debug.h" + +#if defined(__x86_64__) + +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc = 0; + struct map *pos; + struct map_groups *kmaps = &machine->kmaps; + struct maps *maps = &kmaps->maps; + union perf_event *event = zalloc(sizeof(event->mmap) + + machine->id_hdr_size); + + if (!event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; + } + + for (pos = maps__first(maps); pos; pos = map__next(pos)) { + struct kmap *kmap; + size_t size; + + if (!__map__is_extra_kernel_map(pos)) + continue; + + kmap = map__kmap(pos); + + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + machine->id_hdr_size; + + memset(event, 0, size); + + event->mmap.header.type = PERF_RECORD_MMAP; + + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + + event->mmap.header.size = size; + + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pgoff = pos->pgoff; + event->mmap.pid = machine->pid; + + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + + if (perf_tool__process_synth_event(tool, event, machine, + process) != 0) { + rc = -1; + break; + } + } + + free(event); + return rc; +} + +#endif diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c new file mode 100644 index 000000000000..4520ac53caa9 --- /dev/null +++ b/tools/perf/arch/x86/util/machine.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <stdlib.h> + +#include "../../util/machine.h" +#include "../../util/map.h" +#include "../../util/symbol.h" +#include "../../util/sane_ctype.h" + +#include <symbol/kallsyms.h> + +#if defined(__x86_64__) + +struct extra_kernel_map_info { + int cnt; + int max_cnt; + struct extra_kernel_map *maps; + bool get_entry_trampolines; + u64 entry_trampoline; +}; + +static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start, + u64 end, u64 pgoff, const char *name) +{ + if (mi->cnt >= mi->max_cnt) { + void *buf; + size_t sz; + + mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32; + sz = sizeof(struct extra_kernel_map) * mi->max_cnt; + buf = realloc(mi->maps, sz); + if (!buf) + return -1; + mi->maps = buf; + } + + mi->maps[mi->cnt].start = start; + mi->maps[mi->cnt].end = end; + mi->maps[mi->cnt].pgoff = pgoff; + strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN); + + mi->cnt += 1; + + return 0; +} + +static int find_extra_kernel_maps(void *arg, const char *name, char type, + u64 start) +{ + struct extra_kernel_map_info *mi = arg; + + if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL && + !strcmp(name, "_entry_trampoline")) { + mi->entry_trampoline = start; + return 0; + } + + if (is_entry_trampoline(name)) { + u64 end = start + page_size; + + return add_extra_kernel_map(mi, start, end, 0, name); + } + + return 0; +} + +int machine__create_extra_kernel_maps(struct machine *machine, + struct dso *kernel) +{ + struct extra_kernel_map_info mi = { .cnt = 0, }; + char filename[PATH_MAX]; + int ret; + int i; + + machine__get_kallsyms_filename(machine, filename, PATH_MAX); + + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + + ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps); + if (ret) + goto out_free; + + if (!mi.entry_trampoline) + goto out_free; + + for (i = 0; i < mi.cnt; i++) { + struct extra_kernel_map *xm = &mi.maps[i]; + + xm->pgoff = mi.entry_trampoline; + ret = machine__create_extra_kernel_map(machine, kernel, xm); + if (ret) + goto out_free; + } + + machine->trampolines_mapped = mi.cnt; +out_free: + free(mi.maps); + return ret; +} + +#endif diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 4b2caf6d48e7..fead6b3b4206 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) else if (rm[2].rm_so != rm[2].rm_eo) prefix[0] = '+'; else - strncpy(prefix, "+0", 2); + scnprintf(prefix, sizeof(prefix), "+0"); } /* Rename register */ diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 63eb49082774..44195514b19e 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata) u8 *global_data; u8 *process_data; u8 *thread_data; - u64 bytes_done; + u64 bytes_done, secs; long work_done; u32 l; struct rusage rusage; @@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; - td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; + secs = td->runtime_ns / NSEC_PER_SEC; + td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; getrusage(RUSAGE_THREAD, &rusage); td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 51709a961496..8180319285af 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -40,11 +40,11 @@ struct perf_annotate { struct perf_tool tool; struct perf_session *session; + struct annotation_options opts; bool use_tui, use_stdio, use_stdio2, use_gtk; - bool full_paths; - bool print_line; bool skip_missing; bool has_br_stack; + bool group_set; const char *sym_hist_filter; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -161,12 +161,12 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, hist__account_cycles(sample->branch_stack, al, sample, false); bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); out: return err; @@ -228,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, */ if (al->sym != NULL) { rb_erase(&al->sym->rb_node, - &al->map->dso->symbols[al->map->type]); + &al->map->dso->symbols); symbol__delete(al->sym); dso__reset_find_symbol_cache(al->map->dso); } @@ -248,7 +248,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, if (he == NULL) return -ENOMEM; - ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); hists__inc_nr_samples(hists, true); return ret; } @@ -283,15 +283,23 @@ out_put: return ret; } +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + return 0; +} + static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, - ann->print_line, ann->full_paths, 0, 0); - return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, - ann->print_line, ann->full_paths); + return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts); + + return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts); } static void hists__find_annotations(struct hists *hists, @@ -342,7 +350,7 @@ find_next: /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL); + key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); switch (key) { case -1: @@ -389,8 +397,9 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - if (!objdump_path) { - ret = perf_env__lookup_objdump(&session->header.env); + if (!ann->opts.objdump_path) { + ret = perf_env__lookup_objdump(&session->header.env, + &ann->opts.objdump_path); if (ret) goto out; } @@ -471,10 +480,11 @@ int cmd_annotate(int argc, const char **argv) .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, }, + .opts = annotation__default_options, }; struct perf_data data = { .mode = PERF_DATA_MODE_READ, @@ -502,23 +512,26 @@ int cmd_annotate(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &annotate.print_line, + OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, + OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, "Don't shorten the displayed pathnames"), OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), + OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, + &annotate.group_set, + "Show event group information together"), OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), OPT_CALLBACK(0, "symfs", NULL, "directory", "Look for files with symbols relative to this directory", symbol__config_symfs), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), @@ -570,6 +583,9 @@ int cmd_annotate(int argc, const char **argv) annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, HEADER_BRANCH_STACK); + if (annotate.group_set) + perf_evlist__force_leader(annotate.session->evlist); + ret = symbol__annotation_init(); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 41db2cba77eb..115110a4796a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -25,6 +25,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/time-utils.h" +#include "util/probe-file.h" static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { @@ -239,6 +240,34 @@ out: return err; } +static int build_id_cache__purge_all(void) +{ + struct strlist *list; + struct str_node *pos; + int err = 0; + char *buf; + + list = build_id_cache__list_all(false); + if (!list) { + pr_debug("Failed to get buildids: -%d\n", errno); + return -EINVAL; + } + + strlist__for_each_entry(pos, list) { + buf = build_id_cache__origname(pos->s); + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s (%s): %s\n", buf, pos->s, + err ? "FAIL" : "Ok"); + free(buf); + if (err) + break; + } + strlist__delete(list); + + pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok"); + return err; +} + static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; @@ -297,6 +326,26 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) return err; } +static int build_id_cache__show_all(void) +{ + struct strlist *bidlist; + struct str_node *nd; + char *buf; + + bidlist = build_id_cache__list_all(true); + if (!bidlist) { + pr_debug("Failed to get buildids: -%d\n", errno); + return -1; + } + strlist__for_each_entry(nd, bidlist) { + buf = build_id_cache__origname(nd->s); + fprintf(stdout, "%s %s\n", nd->s, buf); + free(buf); + } + strlist__delete(bidlist); + return 0; +} + int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; @@ -304,6 +353,9 @@ int cmd_buildid_cache(int argc, const char **argv) int ret = 0; int ns_id = -1; bool force = false; + bool list_files = false; + bool opts_flag = false; + bool purge_all = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *purge_name_list_str = NULL, @@ -327,6 +379,8 @@ int cmd_buildid_cache(int argc, const char **argv) "file(s) to remove"), OPT_STRING('p', "purge", &purge_name_list_str, "file list", "file(s) to remove (remove old caches too)"), + OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"), + OPT_BOOLEAN('l', "list", &list_files, "list all cached files"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -344,11 +398,20 @@ int cmd_buildid_cache(int argc, const char **argv) argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); - if (argc || (!add_name_list_str && !kcore_filename && - !remove_name_list_str && !purge_name_list_str && - !missing_filename && !update_name_list_str)) + opts_flag = add_name_list_str || kcore_filename || + remove_name_list_str || purge_name_list_str || + missing_filename || update_name_list_str || + purge_all; + + if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); + /* -l is exclusive. It can not be used with other options. */ + if (list_files && opts_flag) { + usage_with_options_msg(buildid_cache_usage, + buildid_cache_options, "-l is exclusive.\n"); + } + if (ns_id > 0) nsi = nsinfo__new(ns_id); @@ -366,6 +429,11 @@ int cmd_buildid_cache(int argc, const char **argv) setup_pager(); + if (list_files) { + ret = build_id_cache__show_all(); + goto out; + } + if (add_name_list_str) { list = strlist__new(add_name_list_str, NULL); if (list) { @@ -420,6 +488,13 @@ int cmd_buildid_cache(int argc, const char **argv) } } + if (purge_all) { + if (build_id_cache__purge_all()) { + pr_warning("Couldn't remove some caches. Error: %s.\n", + str_error_r(errno, sbuf, sizeof(sbuf))); + } + } + if (missing_filename) ret = build_id_cache__fprintf_missing(session, stdout); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2126bfbcb385..6a8738f7ead3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -56,16 +56,16 @@ struct c2c_hist_entry { struct compute_stats cstats; + unsigned long paddr; + unsigned long paddr_cnt; + bool paddr_zero; + char *nodestr; + /* * must be at the end, * because of its callchain dynamic entry */ struct hist_entry he; - - unsigned long paddr; - unsigned long paddr_cnt; - bool paddr_zero; - char *nodestr; }; static char const *coalesce_default = "pid,iaddr"; @@ -1976,7 +1976,7 @@ static int filter_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); if (c2c.show_src && !he->srcline) - he->srcline = hist_entry__get_srcline(he); + he->srcline = hist_entry__srcline(he); calc_width(c2c_he); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 40fe919bbcf3..a3b346359ba0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -440,9 +440,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - - if (al.map != NULL) { + if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { if (!al.map->dso->hit) { al.map->dso->hit = 1; if (map__load(al.map) >= 0) { diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index bcfb363112d3..90d1a2305b72 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -27,7 +27,7 @@ static int __cmd_kallsyms(int argc, const char **argv) for (i = 0; i < argc; ++i) { struct map *map; - struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); + struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map); if (symbol == NULL) { printf("%s: not found\n", argv[i]); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ae11e4c3516a..54d3f21b0e62 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1004,7 +1004,7 @@ static void __print_slab_result(struct rb_root *root, if (is_caller) { addr = data->call_site; if (!raw_ip) - sym = machine__find_kernel_function(machine, addr, &map); + sym = machine__find_kernel_symbol(machine, addr, &map); } else addr = data->ptr; @@ -1068,7 +1068,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines) char *caller = buf; data = rb_entry(next, struct page_stat, node); - sym = machine__find_kernel_function(machine, data->callsite, &map); + sym = machine__find_kernel_symbol(machine, data->callsite, &map); if (sym) caller = sym->name; else @@ -1110,7 +1110,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines char *caller = buf; data = rb_entry(next, struct page_stat, node); - sym = machine__find_kernel_function(machine, data->callsite, &map); + sym = machine__find_kernel_symbol(machine, data->callsite, &map); if (sym) caller = sym->name; else diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 72e2ca096bf5..2b1ef704169f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1438,8 +1438,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, goto out; } - symbol_conf.nr_events = kvm->evlist->nr_entries; - if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) usage_with_options(live_usage, live_options); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c0065923a525..99de91698de1 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -81,8 +81,7 @@ static int parse_probe_event(const char *str) params.target_used = true; } - if (params.nsi) - pev->nsi = nsinfo__get(params.nsi); + pev->nsi = nsinfo__get(params.nsi); /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0f198f6d9b77..c04dc7b53797 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -71,6 +71,7 @@ struct report { bool group_set; int max_stack; struct perf_read_values show_threads_values; + struct annotation_options annotation_opts; const char *pretty_printing_style; const char *cpu_list; const char *symbol_filter_str; @@ -136,26 +137,25 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); } else if (rep->mem_mode) { mi = he->mem_info; - err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel); if (err) goto out; - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } else if (symbol_conf.cumulate_callchain) { if (single) - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, - al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } else { - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } out: @@ -181,11 +181,11 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, rep->nonany_branch_mode); bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); @@ -194,20 +194,11 @@ out: return err; } -/* - * Events in data file are not collect in groups, but we still want - * the group display. Set the artificial group and set the leader's - * forced_leader flag to notify the display code. - */ static void setup_forced_leader(struct report *report, struct perf_evlist *evlist) { - if (report->group_set && !evlist->nr_groups) { - struct perf_evsel *leader = perf_evlist__first(evlist); - - perf_evlist__set_leader(evlist); - leader->forced_leader = true; - } + if (report->group_set) + perf_evlist__force_leader(evlist); } static int process_feature_event(struct perf_tool *tool, @@ -226,7 +217,8 @@ static int process_feature_event(struct perf_tool *tool, } /* - * All features are received, we can force the + * (feat_id = HEADER_LAST_FEATURE) is the end marker which + * means all features are received, now we can force the * group if needed. */ setup_forced_leader(rep, session->evlist); @@ -523,12 +515,9 @@ static void report__warn_kptr_restrict(const struct report *rep) "As no suitable kallsyms nor vmlinux was found, kernel samples\n" "can't be resolved."; - if (kernel_map) { - const struct dso *kdso = kernel_map->dso; - if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) { - desc = "If some relocation was applied (e.g. " - "kexec) symbols may be misresolved."; - } + if (kernel_map && map__has_symbols(kernel_map)) { + desc = "If some relocation was applied (e.g. " + "kexec) symbols may be misresolved."; } ui__warning( @@ -573,7 +562,7 @@ static int report__browse_hists(struct report *rep) ret = perf_evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, &session->header.env, - true); + true, &rep->annotation_opts); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -718,10 +707,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) { - int printed = 0, i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += maps__fprintf_task(&mg->maps[i], indent, fp); - return printed; + return maps__fprintf_task(&mg->maps, indent, fp); } static void task__print_level(struct task *task, FILE *fp, int level) @@ -961,12 +947,6 @@ parse_percent_limit(const struct option *opt, const char *str, return 0; } -#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" - -const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" - CALLCHAIN_REPORT_HELP - "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; - int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -975,6 +955,10 @@ int cmd_report(int argc, const char **argv) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" + const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { "perf report [<options>]", @@ -1004,6 +988,7 @@ int cmd_report(int argc, const char **argv) .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", .socket_filter = -1, + .annotation_opts = annotation__default_options, }; const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", @@ -1093,11 +1078,11 @@ int cmd_report(int argc, const char **argv) "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), @@ -1108,7 +1093,7 @@ int cmd_report(int argc, const char **argv) parse_branch_mode), OPT_BOOLEAN(0, "branch-history", &branch_call_mode, "add last branch records to call history"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &report.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, "Disable symbol demangling"), diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4dfdee668b0c..cbf39dab19c1 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2143,7 +2143,7 @@ static void save_task_callchain(struct perf_sched *sched, return; } - if (!symbol_conf.use_callchain || sample->callchain == NULL) + if (!sched->show_callchain || sample->callchain == NULL) return; if (thread__resolve_callchain(thread, cursor, evsel, sample, @@ -2271,10 +2271,11 @@ static struct thread *get_idle_thread(int cpu) return idle_threads[cpu]; } -static void save_idle_callchain(struct idle_thread_runtime *itr, +static void save_idle_callchain(struct perf_sched *sched, + struct idle_thread_runtime *itr, struct perf_sample *sample) { - if (!symbol_conf.use_callchain || sample->callchain == NULL) + if (!sched->show_callchain || sample->callchain == NULL) return; callchain_cursor__copy(&itr->cursor, &callchain_cursor); @@ -2320,7 +2321,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, /* copy task callchain when entering to idle */ if (perf_evsel__intval(evsel, sample, "next_pid") == 0) - save_idle_callchain(itr, sample); + save_idle_callchain(sched, itr, sample); } } @@ -2849,7 +2850,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } - if (sched->idle_hist && symbol_conf.use_callchain) { + if (sched->idle_hist && sched->show_callchain) { callchain_param.mode = CHAIN_FOLDED; callchain_param.value = CCVAL_PERIOD; @@ -2933,8 +2934,7 @@ static int timehist_check_attr(struct perf_sched *sched, return -1; } - if (sched->show_callchain && - !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (sched->show_callchain && !evsel__has_callchain(evsel)) { pr_info("Samples do not have callchains.\n"); sched->show_callchain = 0; symbol_conf.use_callchain = 0; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e0a9845b6cbc..568ddfac3213 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -153,8 +153,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -165,8 +165,9 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | + PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE, }, @@ -179,16 +180,28 @@ static struct { PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, + [PERF_TYPE_HW_CACHE] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, + [PERF_TYPE_RAW] = { .user_set = false, .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | - PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | - PERF_OUTPUT_PHYS_ADDR, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | + PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | + PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -199,8 +212,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -211,8 +224,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_SYNTH, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -516,7 +529,7 @@ static int perf_session__check_output_opt(struct perf_session *session) evlist__for_each_entry(session->evlist, evsel) { not_pipe = true; - if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { use_callchain = true; break; } @@ -531,21 +544,18 @@ static int perf_session__check_output_opt(struct perf_session *session) */ if (symbol_conf.use_callchain && !output[PERF_TYPE_TRACEPOINT].user_set) { - struct perf_event_attr *attr; - j = PERF_TYPE_TRACEPOINT; evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.type != j) continue; - attr = &evsel->attr; - - if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { output[j].fields |= PERF_OUTPUT_IP; output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_SYMOFFSET; output[j].fields |= PERF_OUTPUT_DSO; - set_print_ip_opts(attr); + set_print_ip_opts(&evsel->attr); goto out; } } @@ -608,7 +618,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, if (PRINT_FIELD(COMM)) { if (latency_format) printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); - else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) + else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain) printed += fprintf(fp, "%s ", thread__comm_str(thread)); else printed += fprintf(fp, "%16s ", thread__comm_str(thread)); @@ -717,8 +727,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + thread__find_map(thread, sample->cpumode, from, &alf); + thread__find_map(thread, sample->cpumode, to, &alt); } printed += fprintf(fp, " 0x%"PRIx64, from); @@ -764,13 +774,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - if (alf.map) - alf.sym = map__find_symbol(alf.map, alf.addr); - - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); - if (alt.map) - alt.sym = map__find_symbol(alt.map, alt.addr); + thread__find_symbol(thread, sample->cpumode, from, &alf); + thread__find_symbol(thread, sample->cpumode, to, &alt); printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp); if (PRINT_FIELD(DSO)) { @@ -814,12 +819,12 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - if (alf.map && !alf.map->dso->adjust_symbols) + if (thread__find_map(thread, sample->cpumode, from, &alf) && + !alf.map->dso->adjust_symbols) from = map__map_ip(alf.map, from); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); - if (alt.map && !alt.map->dso->adjust_symbols) + if (thread__find_map(thread, sample->cpumode, to, &alt) && + !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); printed += fprintf(fp, " 0x%"PRIx64, from); @@ -882,8 +887,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, return 0; } - thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); - if (!al.map || !al.map->dso) { + if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) { pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); return 0; } @@ -933,10 +937,8 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread, memset(&al, 0, sizeof(al)); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); - if (!al.map) - thread__find_addr_map(thread, cpumode, MAP__VARIABLE, - addr, &al); + thread__find_map(thread, cpumode, addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) return 0; @@ -1832,6 +1834,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist *evlist; struct perf_evsel *evsel, *pos; int err; + static struct perf_evsel_script *es; err = perf_event__process_attr(tool, event, pevlist); if (err) @@ -1840,6 +1843,19 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, evlist = *pevlist; evsel = perf_evlist__last(*pevlist); + if (!evsel->priv) { + if (scr->per_event_dump) { + evsel->priv = perf_evsel_script__new(evsel, + scr->session->data); + } else { + es = zalloc(sizeof(*es)); + if (!es) + return -ENOMEM; + es->fp = stdout; + evsel->priv = es; + } + } + if (evsel->attr.type >= PERF_TYPE_MAX && evsel->attr.type != PERF_TYPE_SYNTH) return 0; @@ -3028,6 +3044,15 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + return 0; +} + #ifdef HAVE_AUXTRACE_SUPPORT static int perf_script__process_auxtrace_info(struct perf_tool *tool, union perf_event *event, @@ -3072,7 +3097,7 @@ int cmd_script(int argc, const char **argv) .attr = process_attr, .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_script__process_auxtrace_info, @@ -3123,8 +3148,9 @@ int cmd_script(int argc, const char **argv) "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", + "addr,symoff,srcline,period,iregs,uregs,brstack," + "brstacksym,flags,bpf-output,brstackinsn,brstackoff," + "callindent,insn,insnlen,synth,phys_addr,metric,misc", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f17dc601b0f3..05be023c3f0e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/top.h" #include "asm/bug.h" #include <linux/time64.h> @@ -80,6 +81,9 @@ #include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> #include "sane_ctype.h" @@ -141,6 +145,8 @@ static struct target target = { typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); +#define METRIC_ONLY_LEN 20 + static int run_count = 1; static bool no_inherit = false; static volatile pid_t child_pid = -1; @@ -164,15 +170,21 @@ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; static bool no_merge = false; +static bool walltime_run_table = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; static bool interval_count; +static bool interval_clear; static const char *output_name; static int output_fd; static int print_free_counters_hint; static int print_mixed_hw_group_error; +static u64 *walltime_run; +static bool ru_display = false; +static struct rusage ru_data; +static unsigned int metric_only_len = METRIC_ONLY_LEN; struct perf_stat { bool record; @@ -569,7 +581,7 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) return leader; } -static int __run_perf_stat(int argc, const char **argv) +static int __run_perf_stat(int argc, const char **argv, int run_idx) { int interval = stat_config.interval; int times = stat_config.times; @@ -724,7 +736,7 @@ try_again: break; } } - waitpid(child_pid, &status, 0); + wait4(child_pid, &status, 0, &ru_data); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); @@ -752,6 +764,9 @@ try_again: t1 = rdclock(); + if (walltime_run_table) + walltime_run[run_idx] = t1 - t0; + update_stats(&walltime_nsecs_stats, t1 - t0); /* @@ -766,7 +781,7 @@ try_again: return WEXITSTATUS(status); } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc, const char **argv, int run_idx) { int ret; @@ -779,7 +794,7 @@ static int run_perf_stat(int argc, const char **argv) if (sync_run) sync(); - ret = __run_perf_stat(argc, argv); + ret = __run_perf_stat(argc, argv, run_idx); if (ret) return ret; @@ -957,8 +972,6 @@ static void print_metric_csv(void *ctx, fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); } -#define METRIC_ONLY_LEN 20 - /* Filter out some columns that don't work well in metrics only mode */ static bool valid_only_metric(const char *unit) @@ -989,22 +1002,20 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt, { struct outstate *os = ctx; FILE *out = os->fh; - int n; - char buf[1024]; - unsigned mlen = METRIC_ONLY_LEN; + char buf[1024], str[1024]; + unsigned mlen = metric_only_len; if (!valid_only_metric(unit)) return; unit = fixunit(buf, os->evsel, unit); - if (color) - n = color_fprintf(out, color, fmt, val); - else - n = fprintf(out, fmt, val); - if (n > METRIC_ONLY_LEN) - n = METRIC_ONLY_LEN; if (mlen < strlen(unit)) mlen = strlen(unit) + 1; - fprintf(out, "%*s", mlen - n, ""); + + if (color) + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; + + color_snprintf(str, sizeof(str), color ?: "", fmt, val); + fprintf(out, "%*s ", mlen, str); } static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, @@ -1044,7 +1055,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused, if (csv_output) fprintf(os->fh, "%s%s", unit, csv_sep); else - fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); + fprintf(os->fh, "%*s ", metric_only_len, unit); } static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) @@ -1694,9 +1705,12 @@ static void print_interval(char *prefix, struct timespec *ts) FILE *output = stat_config.output; static int num_print_interval; + if (interval_clear) + puts(CONSOLE_CLEAR); + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if ((num_print_interval == 0 && !csv_output) || interval_clear) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus"); @@ -1709,7 +1723,7 @@ static void print_interval(char *prefix, struct timespec *ts) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU"); + fprintf(output, "# time CPU "); if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; @@ -1728,7 +1742,7 @@ static void print_interval(char *prefix, struct timespec *ts) } } - if (num_print_interval == 0 && metric_only) + if ((num_print_interval == 0 || interval_clear) && metric_only) print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; @@ -1764,19 +1778,81 @@ static void print_header(int argc, const char **argv) } } +static int get_precision(double num) +{ + if (num > 1) + return 0; + + return lround(ceil(-log10(num))); +} + +static void print_table(FILE *output, int precision, double avg) +{ + char tmp[64]; + int idx, indent = 0; + + scnprintf(tmp, 64, " %17.*f", precision, avg); + while (tmp[indent] == ' ') + indent++; + + fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); + + for (idx = 0; idx < run_count; idx++) { + double run = (double) walltime_run[idx] / NSEC_PER_SEC; + int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); + + fprintf(output, " %17.*f (%+.*f) ", + precision, run, precision, run - avg); + + for (h = 0; h < n; h++) + fprintf(output, "#"); + + fprintf(output, "\n"); + } + + fprintf(output, "\n%*s# Final result:\n", indent, ""); +} + +static double timeval2double(struct timeval *t) +{ + return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; +} + static void print_footer(void) { + double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = stat_config.output; int n; if (!null_run) fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); + + if (run_count == 1) { + fprintf(output, " %17.9f seconds time elapsed", avg); + + if (ru_display) { + double ru_utime = timeval2double(&ru_data.ru_utime); + double ru_stime = timeval2double(&ru_data.ru_stime); + + fprintf(output, "\n\n"); + fprintf(output, " %17.9f seconds user\n", ru_utime); + fprintf(output, " %17.9f seconds sys\n", ru_stime); + } + } else { + double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; + /* + * Display at most 2 more significant + * digits than the stddev inaccuracy. + */ + int precision = get_precision(sd) + 2; + + if (walltime_run_table) + print_table(output, precision, avg); + + fprintf(output, " %17.*f +- %.*f seconds time elapsed", + precision, avg, precision, sd); + + print_noise_pct(sd, avg); } fprintf(output, "\n\n"); @@ -1952,6 +2028,8 @@ static const struct option stat_options[] = { "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, "repeat command and print average + stddev (max: 100, forever: 0)"), + OPT_BOOLEAN(0, "table", &walltime_run_table, + "display details about each run (only with -r option)"), OPT_BOOLEAN('n', "null", &null_run, "null run - dont start any counters"), OPT_INCR('d', "detailed", &detailed_run, @@ -1983,6 +2061,8 @@ static const struct option stat_options[] = { "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), + OPT_BOOLEAN(0, "interval-clear", &interval_clear, + "clear screen in between new interval"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, @@ -2362,14 +2442,13 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; + struct parse_events_error errinfo; /* Set attrs if no event is selected and !null_run: */ if (null_run) return 0; if (transaction_run) { - struct parse_events_error errinfo; - if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, @@ -2380,6 +2459,7 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); + parse_events_print_error(&errinfo, transaction_attrs); return -1; } return 0; @@ -2405,10 +2485,11 @@ static int add_default_attributes(void) pmu_have_event("msr", "smi")) { if (!force_metric_only) metric_only = true; - err = parse_events(evsel_list, smi_cost_attrs, NULL); + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); } else { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + parse_events_print_error(&errinfo, smi_cost_attrs); return -1; } if (err) { @@ -2443,12 +2524,13 @@ static int add_default_attributes(void) if (topdown_attrs[0] && str) { if (warn) arch_topdown_group_warn(); - err = parse_events(evsel_list, str, NULL); + err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); free(str); + parse_events_print_error(&errinfo, str); return -1; } } else { @@ -2843,6 +2925,13 @@ int cmd_stat(int argc, const char **argv) goto out; } + if (walltime_run_table && run_count <= 1) { + fprintf(stderr, "--table is only supported with -r\n"); + parse_options_usage(stat_usage, stat_options, "r", 1); + parse_options_usage(NULL, stat_options, "table", 0); + goto out; + } + if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); parse_options_usage(stat_usage, stat_options, "log-fd", 0); @@ -2888,6 +2977,13 @@ int cmd_stat(int argc, const char **argv) setup_system_wide(argc); + /* + * Display user/system times only for single + * run and when there's specified tracee. + */ + if ((run_count == 1) && target__none(&target)) + ru_display = true; + if (run_count < 0) { pr_err("Run count must be a positive number\n"); parse_options_usage(stat_usage, stat_options, "r", 1); @@ -2897,6 +2993,14 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } + if (walltime_run_table) { + walltime_run = zalloc(run_count * sizeof(walltime_run[0])); + if (!walltime_run) { + pr_err("failed to setup -r option"); + goto out; + } + } + if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { if (!target.system_wide || target.cpu_list) { @@ -3012,7 +3116,7 @@ int cmd_stat(int argc, const char **argv) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); - status = run_perf_stat(argc, argv); + status = run_perf_stat(argc, argv, run_idx); if (forever && status != -1) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); @@ -3060,6 +3164,8 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: + free(walltime_run); + if (smi_cost && smi_reset) sysfs__write_int(FREEZE_ON_SMI_PATH, 0); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 813698a9b8c7..a827919c6263 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -533,12 +533,8 @@ static const char *cat_backtrace(union perf_event *event, } tal.filtered = 0; - thread__find_addr_location(al.thread, cpumode, - MAP__FUNCTION, ip, &tal); - - if (tal.sym) - fprintf(f, "..... %016" PRIx64 " %s\n", ip, - tal.sym->name); + if (thread__find_symbol(al.thread, cpumode, ip, &tal)) + fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name); else fprintf(f, "..... %016" PRIx64 "\n", ip); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f39bd60d2708..ffdc2769ff9f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -123,14 +123,9 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) } notes = symbol__annotation(sym); - if (notes->src != NULL) { - pthread_mutex_lock(¬es->lock); - goto out_assign; - } - pthread_mutex_lock(¬es->lock); - if (symbol__alloc_hist(sym) < 0) { + if (!symbol__hists(sym, top->evlist->nr_entries)) { pthread_mutex_unlock(¬es->lock); pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); @@ -138,9 +133,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL); + err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL); if (err == 0) { -out_assign: top->sym_filter_entry = he; } else { char msg[BUFSIZ]; @@ -188,7 +182,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) static void perf_top__record_precise_ip(struct perf_top *top, struct hist_entry *he, struct perf_sample *sample, - int counter, u64 ip) + struct perf_evsel *evsel, u64 ip) { struct annotation *notes; struct symbol *sym = he->ms.sym; @@ -204,7 +198,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, if (pthread_mutex_trylock(¬es->lock)) return; - err = hist_entry__inc_addr_samples(he, sample, counter, ip); + err = hist_entry__inc_addr_samples(he, sample, evsel, ip); pthread_mutex_unlock(¬es->lock); @@ -249,10 +243,9 @@ static void perf_top__show_details(struct perf_top *top) goto out_unlock; printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); + printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); - more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, - 0, top->sym_pcnt_filter, top->print_entries, 4); + more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts); if (top->evlist->enabled) { if (top->zero) @@ -412,7 +405,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -515,7 +508,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&top->sym_pcnt_filter, + prompt_percent(&top->annotation_opts.min_pcnt, "Enter details display event filter (percent)"); break; case 'K': @@ -613,7 +606,8 @@ static void *display_thread_tui(void *arg) perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, &top->session->header.env, - !top->record_opts.overwrite); + !top->record_opts.overwrite, + &top->annotation_opts); done = 1; return NULL; @@ -691,7 +685,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct perf_evsel *evsel = iter->evsel; if (perf_hpp_list.sym && single) - perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr); + perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); @@ -742,7 +736,7 @@ static void perf_event__process_sample(struct perf_tool *tool, "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", - al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + al.map && map__has_symbols(al.map) ? " modules" : ""); if (use_browser <= 0) sleep(5); @@ -750,7 +744,7 @@ static void perf_event__process_sample(struct perf_tool *tool, machine->kptr_restrict_warned = true; } - if (al.sym == NULL) { + if (al.sym == NULL && al.map != NULL) { const char *msg = "Kernel samples will not be resolved.\n"; /* * As we do lazy loading of symtabs we only will know if the @@ -764,8 +758,7 @@ static void perf_event__process_sample(struct perf_tool *tool, * invalid --vmlinux ;-) */ if (!machine->kptr_restrict_warned && !top->vmlinux_warned && - al.map == machine->vmlinux_maps[MAP__FUNCTION] && - RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { + __map__is_kernel(al.map) && map__has_symbols(al.map)) { if (symbol_conf.vmlinux_name) { char serr[256]; dso__strerror_load(al.map->dso, serr, sizeof(serr)); @@ -1084,8 +1077,9 @@ static int __cmd_top(struct perf_top *top) if (top->session == NULL) return -1; - if (!objdump_path) { - ret = perf_env__lookup_objdump(&top->session->header.env); + if (!top->annotation_opts.objdump_path) { + ret = perf_env__lookup_objdump(&top->session->header.env, + &top->annotation_opts.objdump_path); if (ret) goto out_delete; } @@ -1265,8 +1259,8 @@ int cmd_top(int argc, const char **argv) .proc_map_timeout = 500, .overwrite = 1, }, - .max_stack = sysctl_perf_event_max_stack, - .sym_pcnt_filter = 5, + .max_stack = sysctl__max_stack(), + .annotation_opts = annotation__default_options, .nr_threads_synthesize = UINT_MAX, }; struct record_opts *opts = &top.record_opts; @@ -1348,15 +1342,15 @@ int cmd_top(int argc, const char **argv) "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", @@ -1392,6 +1386,9 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; + top.annotation_opts.min_pcnt = 5; + top.annotation_opts.context = 4; + top.evlist = perf_evlist__new(); if (top.evlist == NULL) return -ENOMEM; @@ -1469,8 +1466,6 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - symbol_conf.nr_events = top.evlist->nr_entries; - if (top.delay_secs < 1) top.delay_secs = 1; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3ad17ee89403..6a748eca2edb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2024,8 +2024,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; - thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_symbol(thread, sample->cpumode, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); @@ -2037,12 +2036,10 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, - sample->addr, &al); + thread__find_symbol(thread, sample->cpumode, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, sample->cpumode, - MAP__FUNCTION, sample->addr, &al); + thread__find_symbol(thread, sample->cpumode, sample->addr, &al); if (al.map) map_type = 'x'; @@ -2494,7 +2491,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * to override an explicitely set --max-stack global setting. */ evlist__for_each_entry(evlist, evsel) { - if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && + if (evsel__has_callchain(evsel) && evsel->attr.sample_max_stack == 0) evsel->attr.sample_max_stack = trace->max_stack; } @@ -3165,7 +3162,7 @@ int cmd_trace(int argc, const char **argv) mmap_pages_user_set = false; if (trace.max_stack == UINT_MAX) { - trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; + trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack(); max_stack_user_set = false; } diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9aff89bc7535..10f333e2e825 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -55,22 +55,26 @@ include/uapi/asm-generic/ioctls.h include/uapi/asm-generic/mman-common.h ' -check () { - file=$1 +check_2 () { + file1=$1 + file2=$2 shift - opts= - while [ -n "$*" ]; do - opts="$opts \"$1\"" - shift - done + shift - cmd="diff $opts ../$file ../../$file > /dev/null" + cmd="diff $* $file1 $file2 > /dev/null" - test -f ../../$file && + test -f $file2 && eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2 } +check () { + file=$1 + + shift + + check_2 ../$file ../../$file $* +} # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check @@ -83,7 +87,7 @@ for i in $HEADERS; do done # diff with extra ignore lines -check arch/x86/lib/memcpy_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -check arch/x86/lib/memset_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" -check include/uapi/linux/mman.h -I "^#include <\(uapi/\)*asm/mman.h>" +check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' +check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' +check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"' +check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c new file mode 100644 index 000000000000..b9c203219691 --- /dev/null +++ b/tools/perf/examples/bpf/5sec.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + Description: + + . Disable strace like syscall tracing (--no-syscalls), or try tracing + just some (-e *sleep). + + . Attach a filter function to a kernel function, returning when it should + be considered, i.e. appear on the output. + + . Run it system wide, so that any sleep of >= 5 seconds and < than 6 + seconds gets caught. + + . Ask for callgraphs using DWARF info, so that userspace can be unwound + + . While this is running, run something like "sleep 5s". + + . If we decide to add tv_nsec as well, then it becomes: + + int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec) + + I.e. add where it comes from (rqtp->tv_nsec) and where it will be + accessible in the function body (nsec) + + # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/ + 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5 + hrtimer_nanosleep ([kernel.kallsyms]) + __x64_sys_nanosleep ([kernel.kallsyms]) + do_syscall_64 ([kernel.kallsyms]) + entry_SYSCALL_64 ([kernel.kallsyms]) + __GI___nanosleep (/usr/lib64/libc-2.26.so) + rpl_nanosleep (/usr/bin/sleep) + xnanosleep (/usr/bin/sleep) + main (/usr/bin/sleep) + __libc_start_main (/usr/lib64/libc-2.26.so) + _start (/usr/bin/sleep) + ^C# + + Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> +*/ + +#include <bpf.h> + +int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec) +{ + return sec == 5; +} + +license(GPL); diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c new file mode 100644 index 000000000000..3776d26db9e7 --- /dev/null +++ b/tools/perf/examples/bpf/empty.c @@ -0,0 +1,3 @@ +#include <bpf.h> + +license(GPL); diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h new file mode 100644 index 000000000000..dd764ad5efdf --- /dev/null +++ b/tools/perf/include/bpf/bpf.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _PERF_BPF_H +#define _PERF_BPF_H +#define SEC(NAME) __attribute__((section(NAME), used)) + +#define probe(function, vars) \ + SEC(#function "=" #function " " #vars) function + +#define license(name) \ +char _license[] SEC("license") = #name; \ +int _version SEC("version") = LINUX_VERSION_CODE; + +#endif /* _PERF_BPF_H */ diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 0c6d1002b524..ac1bcdc17dae 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -35,6 +35,7 @@ #include <sys/mman.h> #include <syscall.h> /* for gettid() */ #include <err.h> +#include <linux/kernel.h> #include "jvmti_agent.h" #include "../util/jitdump.h" @@ -249,7 +250,7 @@ void *jvmti_open(void) /* * jitdump file name */ - snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 20a08cb32332..a11cb006f968 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -12,7 +12,6 @@ #include "util/env.h" #include <subcmd/exec-cmd.h> #include "util/config.h" -#include "util/quote.h" #include <subcmd/run-command.h> #include "util/parse-events.h" #include <subcmd/parse-options.h> @@ -238,7 +237,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argc)--; } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) { tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); - fprintf(stderr, "dir: %s\n", tracing_path); + fprintf(stderr, "dir: %s\n", tracing_path_mount()); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--list-cmds")) { @@ -421,22 +420,11 @@ void pthread__unblock_sigwinch(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } -#ifdef _SC_LEVEL1_DCACHE_LINESIZE -#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) -#else -static void cache_line_size(int *cacheline_sizep) -{ - if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) - pr_debug("cannot determine cache line size"); -} -#endif - int main(int argc, const char **argv) { int err; const char *cmd; char sbuf[STRERR_BUFSIZE]; - int value; /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); @@ -444,13 +432,6 @@ int main(int argc, const char **argv) /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); - cache_line_size(&cacheline_size); - - if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) - sysctl_perf_event_max_stack = value; - - if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) - sysctl_perf_event_max_contexts_per_stack = value; cmd = extract_argv0_path(argv[0]); if (!cmd) @@ -458,15 +439,11 @@ int main(int argc, const char **argv) srandom(time(NULL)); - perf_config__init(); err = perf_config(perf_default_config, NULL); if (err) return err; set_buildid_dir(NULL); - /* get debugfs/tracefs mount point from /proc/mounts */ - tracing_path_mount(); - /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 17783913d330..215ba30b8534 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o -CHOSTFLAGS_jevents.o = -I$(srctree)/tools/include +HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py index 38dfb720fb6f..54ace2f6bc36 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py @@ -31,10 +31,8 @@ def flag_str(event_name, field_name, value): string = "" if flag_fields[event_name][field_name]: - print_delim = 0 - keys = flag_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + print_delim = 0 + for idx in sorted(flag_fields[event_name][field_name]['values']): if not value and not idx: string += flag_fields[event_name][field_name]['values'][idx] break @@ -51,14 +49,12 @@ def symbol_str(event_name, field_name, value): string = "" if symbolic_fields[event_name][field_name]: - keys = symbolic_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + for idx in sorted(symbolic_fields[event_name][field_name]['values']): if not value and not idx: - string = symbolic_fields[event_name][field_name]['values'][idx] + string = symbolic_fields[event_name][field_name]['values'][idx] break - if (value == idx): - string = symbolic_fields[event_name][field_name]['values'][idx] + if (value == idx): + string = symbolic_fields[event_name][field_name]['values'][idx] break return string @@ -74,19 +70,17 @@ def trace_flag_str(value): string = "" print_delim = 0 - keys = trace_flags.keys() - - for idx in keys: - if not value and not idx: - string += "NONE" - break - - if idx and (value & idx) == idx: - if print_delim: - string += " | "; - string += trace_flags[idx] - print_delim = 1 - value &= ~idx + for idx in trace_flags: + if not value and not idx: + string += "NONE" + break + + if idx and (value & idx) == idx: + if print_delim: + string += " | "; + string += trace_flags[idx] + print_delim = 1 + value &= ~idx return string diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py index 81a56cd2b3c1..21a7a1298094 100755 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -8,6 +8,7 @@ # PerfEvent is the base class for all perf event sample, PebsEvent # is a HW base Intel x86 PEBS event, and user could add more SW/HW # event classes based on requirements. +from __future__ import print_function import struct @@ -44,7 +45,8 @@ class PerfEvent(object): PerfEvent.event_num += 1 def show(self): - print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % + (self.name, self.symbol, self.comm, self.dso)) # # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py index fdd92f699055..cac7b2542ee8 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py @@ -11,7 +11,7 @@ try: import wx except ImportError: - raise ImportError, "You need to install the wxpython lib for this script" + raise ImportError("You need to install the wxpython lib for this script") class RootFrame(wx.Frame): diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index f6c84966e4f8..7384dcb628c4 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -5,6 +5,7 @@ # This software may be distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. +from __future__ import print_function import errno, os @@ -33,7 +34,7 @@ def nsecs_str(nsecs): return str def add_stats(dict, key, value): - if not dict.has_key(key): + if key not in dict: dict[key] = (value, value, value, 1) else: min, max, avg, count = dict[key] @@ -72,10 +73,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names.\n" \ - "For example:\n # apt-get install python-audit (Ubuntu)" \ - "\n # yum install audit-libs-python (Fedora)" \ - "\n etc.\n" + print("Install the audit-libs-python package to get syscall names.\n" + "For example:\n # apt-get install python-audit (Ubuntu)" + "\n # yum install audit-libs-python (Fedora)" + "\n etc.\n") def syscall_name(id): try: diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-record b/tools/perf/scripts/python/bin/powerpc-hcalls-record new file mode 100644 index 000000000000..b7402aa9147d --- /dev/null +++ b/tools/perf/scripts/python/bin/powerpc-hcalls-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e "{powerpc:hcall_entry,powerpc:hcall_exit}" $@ diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-report b/tools/perf/scripts/python/bin/powerpc-hcalls-report new file mode 100644 index 000000000000..dd32ad7465f6 --- /dev/null +++ b/tools/perf/scripts/python/bin/powerpc-hcalls-report @@ -0,0 +1,2 @@ +#!/bin/bash +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/powerpc-hcalls.py diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py new file mode 100644 index 000000000000..00e0e7476e55 --- /dev/null +++ b/tools/perf/scripts/python/powerpc-hcalls.py @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# Copyright (C) 2018 Ravi Bangoria, IBM Corporation +# +# Hypervisor call statisics + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +# output: { +# opcode: { +# 'min': minimum time nsec +# 'max': maximum time nsec +# 'time': average time nsec +# 'cnt': counter +# } ... +# } +output = {} + +# d_enter: { +# cpu: { +# opcode: nsec +# } ... +# } +d_enter = {} + +hcall_table = { + 4: 'H_REMOVE', + 8: 'H_ENTER', + 12: 'H_READ', + 16: 'H_CLEAR_MOD', + 20: 'H_CLEAR_REF', + 24: 'H_PROTECT', + 28: 'H_GET_TCE', + 32: 'H_PUT_TCE', + 36: 'H_SET_SPRG0', + 40: 'H_SET_DABR', + 44: 'H_PAGE_INIT', + 48: 'H_SET_ASR', + 52: 'H_ASR_ON', + 56: 'H_ASR_OFF', + 60: 'H_LOGICAL_CI_LOAD', + 64: 'H_LOGICAL_CI_STORE', + 68: 'H_LOGICAL_CACHE_LOAD', + 72: 'H_LOGICAL_CACHE_STORE', + 76: 'H_LOGICAL_ICBI', + 80: 'H_LOGICAL_DCBF', + 84: 'H_GET_TERM_CHAR', + 88: 'H_PUT_TERM_CHAR', + 92: 'H_REAL_TO_LOGICAL', + 96: 'H_HYPERVISOR_DATA', + 100: 'H_EOI', + 104: 'H_CPPR', + 108: 'H_IPI', + 112: 'H_IPOLL', + 116: 'H_XIRR', + 120: 'H_MIGRATE_DMA', + 124: 'H_PERFMON', + 220: 'H_REGISTER_VPA', + 224: 'H_CEDE', + 228: 'H_CONFER', + 232: 'H_PROD', + 236: 'H_GET_PPP', + 240: 'H_SET_PPP', + 244: 'H_PURR', + 248: 'H_PIC', + 252: 'H_REG_CRQ', + 256: 'H_FREE_CRQ', + 260: 'H_VIO_SIGNAL', + 264: 'H_SEND_CRQ', + 272: 'H_COPY_RDMA', + 276: 'H_REGISTER_LOGICAL_LAN', + 280: 'H_FREE_LOGICAL_LAN', + 284: 'H_ADD_LOGICAL_LAN_BUFFER', + 288: 'H_SEND_LOGICAL_LAN', + 292: 'H_BULK_REMOVE', + 304: 'H_MULTICAST_CTRL', + 308: 'H_SET_XDABR', + 312: 'H_STUFF_TCE', + 316: 'H_PUT_TCE_INDIRECT', + 332: 'H_CHANGE_LOGICAL_LAN_MAC', + 336: 'H_VTERM_PARTNER_INFO', + 340: 'H_REGISTER_VTERM', + 344: 'H_FREE_VTERM', + 348: 'H_RESET_EVENTS', + 352: 'H_ALLOC_RESOURCE', + 356: 'H_FREE_RESOURCE', + 360: 'H_MODIFY_QP', + 364: 'H_QUERY_QP', + 368: 'H_REREGISTER_PMR', + 372: 'H_REGISTER_SMR', + 376: 'H_QUERY_MR', + 380: 'H_QUERY_MW', + 384: 'H_QUERY_HCA', + 388: 'H_QUERY_PORT', + 392: 'H_MODIFY_PORT', + 396: 'H_DEFINE_AQP1', + 400: 'H_GET_TRACE_BUFFER', + 404: 'H_DEFINE_AQP0', + 408: 'H_RESIZE_MR', + 412: 'H_ATTACH_MCQP', + 416: 'H_DETACH_MCQP', + 420: 'H_CREATE_RPT', + 424: 'H_REMOVE_RPT', + 428: 'H_REGISTER_RPAGES', + 432: 'H_DISABLE_AND_GETC', + 436: 'H_ERROR_DATA', + 440: 'H_GET_HCA_INFO', + 444: 'H_GET_PERF_COUNT', + 448: 'H_MANAGE_TRACE', + 468: 'H_FREE_LOGICAL_LAN_BUFFER', + 472: 'H_POLL_PENDING', + 484: 'H_QUERY_INT_STATE', + 580: 'H_ILLAN_ATTRIBUTES', + 592: 'H_MODIFY_HEA_QP', + 596: 'H_QUERY_HEA_QP', + 600: 'H_QUERY_HEA', + 604: 'H_QUERY_HEA_PORT', + 608: 'H_MODIFY_HEA_PORT', + 612: 'H_REG_BCMC', + 616: 'H_DEREG_BCMC', + 620: 'H_REGISTER_HEA_RPAGES', + 624: 'H_DISABLE_AND_GET_HEA', + 628: 'H_GET_HEA_INFO', + 632: 'H_ALLOC_HEA_RESOURCE', + 644: 'H_ADD_CONN', + 648: 'H_DEL_CONN', + 664: 'H_JOIN', + 676: 'H_VASI_STATE', + 688: 'H_ENABLE_CRQ', + 696: 'H_GET_EM_PARMS', + 720: 'H_SET_MPP', + 724: 'H_GET_MPP', + 748: 'H_HOME_NODE_ASSOCIATIVITY', + 756: 'H_BEST_ENERGY', + 764: 'H_XIRR_X', + 768: 'H_RANDOM', + 772: 'H_COP', + 788: 'H_GET_MPP_X', + 796: 'H_SET_MODE', + 61440: 'H_RTAS', +} + +def hcall_table_lookup(opcode): + if (hcall_table.has_key(opcode)): + return hcall_table[opcode] + else: + return opcode + +print_ptrn = '%-28s%10s%10s%10s%10s' + +def trace_end(): + print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)') + print '-' * 68 + for opcode in output: + h_name = hcall_table_lookup(opcode) + time = output[opcode]['time'] + cnt = output[opcode]['cnt'] + min_t = output[opcode]['min'] + max_t = output[opcode]['max'] + + print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt) + +def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, + opcode, retval): + if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)): + diff = nsecs(sec, nsec) - d_enter[cpu][opcode] + + if (output.has_key(opcode)): + output[opcode]['time'] += diff + output[opcode]['cnt'] += 1 + if (output[opcode]['min'] > diff): + output[opcode]['min'] = diff + if (output[opcode]['max'] < diff): + output[opcode]['max'] = diff + else: + output[opcode] = { + 'time': diff, + 'cnt': 1, + 'min': diff, + 'max': diff, + } + + del d_enter[cpu][opcode] +# else: +# print "Can't find matching hcall_enter event. Ignoring sample" + +def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm, + callchain, opcode): + if (d_enter.has_key(cpu)): + d_enter[cpu][opcode] = nsecs(sec, nsec) + else: + d_enter[cpu] = {opcode: nsecs(sec, nsec)} diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index de66cb3b72c9..3473e7f66081 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py @@ -9,13 +9,17 @@ # This software is distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. - +from __future__ import print_function import os import sys from collections import defaultdict -from UserList import UserList +try: + from UserList import UserList +except ImportError: + # Python 3: UserList moved to the collections package + from collections import UserList sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -300,7 +304,7 @@ class TimeSliceList(UserList): if i == -1: return - for i in xrange(i, len(self.data)): + for i in range(i, len(self.data)): timeslice = self.data[i] if timeslice.start > end: return @@ -336,8 +340,8 @@ class SchedEventProxy: on_cpu_task = self.current_tsk[headers.cpu] if on_cpu_task != -1 and on_cpu_task != prev_pid: - print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ - (headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) + print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ + headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) threads[prev_pid] = prev_comm threads[next_pid] = next_comm diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cac8f8889bc3..dd850a26d579 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -422,7 +422,7 @@ static const char *shell_test__description(char *description, size_t size, #define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (!is_directory(base, ent)) + if (!is_directory(base, ent) && ent->d_name[0] != '.') static const char *shell_tests__dir(char *path, size_t size) { @@ -654,6 +654,15 @@ static int perf_test__list(int argc, const char **argv) continue; pr_info("%2d: %s\n", i, t->desc); + + if (t->subtest.get_nr) { + int subn = t->subtest.get_nr(); + int subi; + + for (subi = 0; subi < subn; subi++) + pr_info("%2d:%1d: %s\n", i, subi + 1, + t->subtest.get_desc(subi)); + } } perf_test__list_shell(argc, argv, i); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 99936352df4f..4892bd2dc33e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -236,14 +236,13 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); - if (!al.map || !al.map->dso) { + if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) { if (cpumode == PERF_RECORD_MISC_HYPERVISOR) { pr_debug("Hypervisor address can not be resolved - skipping\n"); return 0; } - pr_debug("thread__find_addr_map failed\n"); + pr_debug("thread__find_map failed\n"); return -1; } @@ -561,6 +560,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); machine = machine__new_host(); + machine->env = &perf_env; ret = machine__create_kernel_maps(machine); if (ret < 0) { diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index f7c5b613d667..b889a28fd80b 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -131,20 +131,20 @@ struct machine *setup_fake_machine(struct machines *machines) goto out; /* emulate dso__load() */ - dso__set_loaded(dso, MAP__FUNCTION); + dso__set_loaded(dso); for (k = 0; k < fake_symbols[i].nr_syms; k++) { struct symbol *sym; struct fake_sym *fsym = &fake_symbols[i].syms[k]; sym = symbol__new(fsym->start, fsym->length, - STB_GLOBAL, fsym->name); + STB_GLOBAL, STT_FUNC, fsym->name); if (sym == NULL) { dso__put(dso); goto out; } - symbols__insert(&dso->symbols[MAP__FUNCTION], sym); + symbols__insert(&dso->symbols, sym); } dso__put(dso); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index 8e57d46109de..148dd31cc201 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -127,6 +127,22 @@ int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_un M("[vdso]", PERF_RECORD_MISC_KERNEL, false); M("[vdso]", PERF_RECORD_MISC_USER, false); + T("[vdso32]", true , true , false, false, "[vdso32]", NULL); + T("[vdso32]", false , true , false, false, NULL , NULL); + T("[vdso32]", true , false , false, false, "[vdso32]", NULL); + T("[vdso32]", false , false , false, false, NULL , NULL); + M("[vdso32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vdso32]", PERF_RECORD_MISC_KERNEL, false); + M("[vdso32]", PERF_RECORD_MISC_USER, false); + + T("[vdsox32]", true , true , false, false, "[vdsox32]", NULL); + T("[vdsox32]", false , true , false, false, NULL , NULL); + T("[vdsox32]", true , false , false, false, "[vdsox32]", NULL); + T("[vdsox32]", false , false , false, false, NULL , NULL); + M("[vdsox32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vdsox32]", PERF_RECORD_MISC_KERNEL, false); + M("[vdsox32]", PERF_RECORD_MISC_USER, false); + /* path alloc_name alloc_ext kmod comp name ext */ T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL); T("[vsyscall]", false , true , false, false, NULL , NULL); diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 868d82b501f4..b1af2499a3c9 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -188,9 +188,8 @@ static int mmap_events(synth_cb synth) pr_debug("looking for map %p\n", td->map); - thread__find_addr_map(thread, - PERF_RECORD_MISC_USER, MAP__FUNCTION, - (unsigned long) (td->map + 1), &al); + thread__find_map(thread, PERF_RECORD_MISC_USER, + (unsigned long) (td->map + 1), &al); thread__put(thread); @@ -218,7 +217,7 @@ static int mmap_events(synth_cb synth) * perf_event__synthesize_threads (global) * * We test we can find all memory maps via: - * thread__find_addr_map + * thread__find_map * * by using all thread objects. */ diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 18b06444f230..61211918bfba 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -499,7 +499,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); - TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel)); TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); /* cpu/config=2,call-graph=no,time=0,period=2000/ */ @@ -512,7 +512,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); - TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel)); TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); return 0; @@ -1309,18 +1309,31 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist) return 0; } +static bool test__intel_pt_valid(void) +{ + return !!perf_pmu__find("intel_pt"); +} + +static int test__intel_pt(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; DIR *events_dir; int cnt = 0; - events_dir = opendir(tracing_events_path); + events_dir = tracing_events__opendir(); TEST_ASSERT_VAL("Can't open events dir", events_dir); while ((events_ent = readdir(events_dir))) { - char sys_path[PATH_MAX]; + char *sys_path; struct dirent *sys_ent; DIR *sys_dir; @@ -1331,8 +1344,8 @@ static int count_tracepoints(void) || !strcmp(events_ent->d_name, "header_page")) continue; - scnprintf(sys_path, PATH_MAX, "%s/%s", - tracing_events_path, events_ent->d_name); + sys_path = get_events_file(events_ent->d_name); + TEST_ASSERT_VAL("Can't get sys path", sys_path); sys_dir = opendir(sys_path); TEST_ASSERT_VAL("Can't open sys dir", sys_dir); @@ -1348,6 +1361,7 @@ static int count_tracepoints(void) } closedir(sys_dir); + put_events_file(sys_path); } closedir(events_dir); @@ -1366,6 +1380,7 @@ struct evlist_test { const char *name; __u32 type; const int id; + bool (*valid)(void); int (*check)(struct perf_evlist *evlist); }; @@ -1637,6 +1652,12 @@ static struct evlist_test test__events[] = { .check = test__checkevent_config_cache, .id = 51, }, + { + .name = "intel_pt//u", + .valid = test__intel_pt_valid, + .check = test__intel_pt, + .id = 52, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1672,17 +1693,24 @@ static struct terms_test test__terms[] = { static int test_event(struct evlist_test *e) { + struct parse_events_error err = { .idx = 0, }; struct perf_evlist *evlist; int ret; + if (e->valid && !e->valid()) { + pr_debug("... SKIP"); + return 0; + } + evlist = perf_evlist__new(); if (evlist == NULL) return -ENOMEM; - ret = parse_events(evlist, e->name, NULL); + ret = parse_events(evlist, e->name, &err); if (ret) { - pr_debug("failed to parse event '%s', err %d\n", - e->name, ret); + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + e->name, ret, err.str); + parse_events_print_error(&err, e->name); } else { ret = e->check(evlist); } @@ -1700,10 +1728,11 @@ static int test_events(struct evlist_test *events, unsigned cnt) for (i = 0; i < cnt; i++) { struct evlist_test *e = &events[i]; - pr_debug("running test %d '%s'\n", e->id, e->name); + pr_debug("running test %d '%s'", e->id, e->name); ret1 = test_event(e); if (ret1) ret2 = ret1; + pr_debug("\n"); } return ret2; @@ -1785,7 +1814,7 @@ static int test_pmu_events(void) } while (!ret && (ent = readdir(dir))) { - struct evlist_test e; + struct evlist_test e = { .id = 0, }; char name[2 * NAME_MAX + 1 + 12 + 3]; /* Names containing . are special and cannot be used directly */ diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 5d2df65ada6a..40ab72149ce1 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -7,8 +7,7 @@ #include <stdlib.h> #include <linux/compiler.h> #include "tests.h" - -extern int verbose; +#include "util/debug.h" int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) { diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index ee86473643be..94e513e62b34 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -11,38 +11,43 @@ . $(dirname $0)/lib/probe.sh libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') -nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 +nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { - idx=0 - expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" - expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$" + + expected=`mktemp -u /tmp/expected.XXX` + + echo "ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" > $expected + echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$" - expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$" - expected[4]="main[[:space:]]\(.*/bin/ping.*\)$" + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; *) eventattr='max-stack=3' - expected[2]="getaddrinfo[[:space:]]\($libc\)$" - expected[3]=".*\(.*/bin/ping.*\)$" + echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; esac - file=`mktemp -u /tmp/perf.data.XXX` + perf_data=`mktemp -u /tmp/perf.data.XXX` + perf_script=`mktemp -u /tmp/perf.script.XXX` + perf record -e probe_libc:inet_pton/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 + perf script -i $perf_data > $perf_script - perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $file | while read line ; do + exec 3<$perf_script + exec 4<$expected + while read line <&3 && read -r pattern <&4; do + [ -z "$pattern" ] && break echo $line - echo "$line" | egrep -q "${expected[$idx]}" + echo "$line" | egrep -q "$pattern" if [ $? -ne 0 ] ; then - printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line" + printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line" exit 1 fi - let idx+=1 - [ -z "${expected[$idx]}" ] && break done # If any statements are executed from this point onwards, @@ -58,6 +63,6 @@ skip_if_no_perf_probe && \ perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace err=$? -rm -f ${file} +rm -f ${perf_data} ${perf_script} ${expected} perf probe -q -d probe_libc:inet_pton exit $err diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 55ad9793d544..4ce276efe6b4 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 17cb1bb3448c..9497d02f69e6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -45,6 +45,7 @@ static int session_write_header(char *path) perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_ARCH); session->header.data_size += DATA_SIZE; @@ -70,6 +71,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", session); + /* On platforms with large numbers of CPUs process_cpu_topology() + * might issue an error while reading the perf.data file section + * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member + * cpu is a NULL pointer. + * Example: On s390 + * CPU 0 is on core_id 0 and physical_package_id 6 + * CPU 1 is on core_id 1 and physical_package_id 3 + * + * Core_id and physical_package_id are platform and architecture + * dependend and might have higher numbers than the CPU id. + * This actually depends on the configuration. + * + * In this case process_cpu_topology() prints error message: + * "socket_id number is too big. You may need to upgrade the + * perf tool." + * + * This is the reason why this test might be skipped. + */ + if (!session->header.env.cpu) + return TEST_SKIP; + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -95,7 +117,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe { char path[PATH_MAX]; struct cpu_map *map; - int ret = -1; + int ret = TEST_FAIL; TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); @@ -110,12 +132,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe goto free_path; } - if (check_cpu_topology(path, map)) - goto free_map; - ret = 0; - -free_map: + ret = check_cpu_topology(path, map); cpu_map__put(map); + free_path: unlink(path); return ret; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 1e5adb65632a..7691980b7df1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -19,8 +19,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest struct symbol *sym; struct map *kallsyms_map, *vmlinux_map, *map; struct machine kallsyms, vmlinux; - enum map_type type = MAP__FUNCTION; - struct maps *maps = &vmlinux.kmaps.maps[type]; + struct maps *maps = machine__kernel_maps(&vmlinux); u64 mem_start, mem_end; bool header_printed; @@ -56,7 +55,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) { + if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { pr_debug("dso__load_kallsyms "); goto out; } @@ -94,7 +93,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines * to fixup the symbols. */ - if (machine__load_vmlinux_path(&vmlinux, type) <= 0) { + if (machine__load_vmlinux_path(&vmlinux) <= 0) { pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n"); err = TEST_SKIP; goto out; @@ -108,7 +107,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * in the kallsyms dso. For the ones that are in both, check its names and * end addresses too. */ - for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { + map__for_each_symbol(vmlinux_map, sym, nd) { struct symbol *pair, *first_pair; sym = rb_entry(nd, struct symbol, rb_node); @@ -119,8 +118,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start); mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end); - first_pair = machine__find_kernel_symbol(&kallsyms, type, - mem_start, NULL); + first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); pair = first_pair; if (pair && UM(pair->start) == mem_start) { @@ -149,7 +147,7 @@ next_pair: */ continue; } else { - pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL); + pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); if (pair) { if (UM(pair->start) == mem_start) goto next_pair; @@ -183,7 +181,7 @@ next_pair: * so use the short name, less descriptive but the same ("[kernel]" in * both cases. */ - pair = map_groups__find_by_name(&kallsyms.kmaps, type, + pair = map_groups__find_by_name(&kallsyms.kmaps, (map->dso->kernel ? map->dso->short_name : map->dso->name)); @@ -206,7 +204,7 @@ next_pair: mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start); mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end); - pair = map_groups__find(&kallsyms.kmaps, type, mem_start); + pair = map_groups__find(&kallsyms.kmaps, mem_start); if (pair == NULL || pair->priv) continue; @@ -228,7 +226,7 @@ next_pair: header_printed = false; - maps = &kallsyms.kmaps.maps[type]; + maps = machine__kernel_maps(&kallsyms); for (map = maps__first(maps); map; map = map__next(map)) { if (!map->priv) { diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 0be4138fbe71..f24722146ebe 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -1,6 +1,6 @@ #!/bin/sh -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*' diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3781d74088a7..3b4f1c10ff57 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -29,6 +29,7 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; + struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -410,7 +411,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, notes = symbol__annotation(dl->ops.target.sym); pthread_mutex_lock(¬es->lock); - if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) { + if (!symbol__hists(dl->ops.target.sym, evsel->evlist->nr_entries)) { pthread_mutex_unlock(¬es->lock); ui__warning("Not enough memory for annotating '%s' symbol!\n", dl->ops.target.sym->name); @@ -418,7 +419,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, } pthread_mutex_unlock(¬es->lock); - symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt); + symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt, browser->opts); sym_title(ms->sym, ms->map, title, sizeof(title)); ui_browser__show_title(&browser->b, title); return true; @@ -695,6 +696,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" + "c Show min/max cycle\n" "/ Search string\n" "k Toggle line numbers\n" "P Print to [symbol_name].annotation file.\n" @@ -791,6 +793,13 @@ show_sup_ins: notes->options->show_total_period = true; annotation__update_column_widths(notes); continue; + case 'c': + if (notes->options->show_minmax_cycle) + notes->options->show_minmax_cycle = false; + else + notes->options->show_minmax_cycle = true; + annotation__update_column_widths(notes); + continue; case K_LEFT: case K_ESC: case 'q': @@ -809,24 +818,27 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { - return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt, opts); } int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); - return map_symbol__tui_annotate(&he->ms, evsel, hbt); + return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); } int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { struct annotation *notes = symbol__annotation(sym); struct map_symbol ms = { @@ -843,6 +855,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, .priv = &ms, .use_navkeypressed = true, }, + .opts = opts, }; int ret = -1, err; @@ -852,7 +865,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch); + err = symbol__annotate2(sym, map, evsel, opts, &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e5f247247daa..a96f62ca984a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1231,6 +1231,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, int width = browser->b.width; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool use_callchain = hist_entry__has_callchains(entry) && symbol_conf.use_callchain; off_t row_offset = entry->row_offset; bool first = true; struct perf_hpp_fmt *fmt; @@ -1240,7 +1241,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, browser->selection = &entry->ms; } - if (symbol_conf.use_callchain) { + if (use_callchain) { hist_entry__init_have_children(entry); folded_sign = hist_entry__folded(entry); } @@ -1276,7 +1277,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (first) { - if (symbol_conf.use_callchain) { + if (use_callchain) { ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } @@ -1583,7 +1584,7 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, int column = 0; int span = 0; - if (symbol_conf.use_callchain) { + if (hists__has_callchains(hists) && symbol_conf.use_callchain) { ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; @@ -1987,7 +1988,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, bool first = true; int ret; - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { folded_sign = hist_entry__folded(he); printed += fprintf(fp, "%c ", folded_sign); } @@ -2175,7 +2176,8 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct perf_evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env) + struct perf_env *env, + struct annotation_options *annotation_opts) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2183,6 +2185,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; + browser->annotation_opts = annotation_opts; } return browser; } @@ -2336,7 +2339,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!objdump_path && perf_env__lookup_objdump(browser->env)) + if (!browser->annotation_opts->objdump_path && + perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2344,7 +2348,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) return 0; evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, + browser->annotation_opts); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2667,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, he->nr_rows = 0; } - if (!he->leaf || !symbol_conf.use_callchain) + if (!he->leaf || !hist_entry__has_callchains(he) || !symbol_conf.use_callchain) goto next; if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -2697,10 +2702,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); struct branch_info *bi; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3062,6 +3068,7 @@ out: struct perf_evsel_menu { struct ui_browser b; struct perf_evsel *selection; + struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3163,7 +3170,8 @@ browse_hists: true, hbt, menu->min_pcnt, menu->env, - warn_lost_event); + warn_lost_event, + menu->annotation_opts); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3222,7 +3230,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -3237,6 +3246,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, }, .min_pcnt = min_pcnt, .env = env, + .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3257,7 +3267,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { int nr_entries = evlist->nr_entries; @@ -3267,7 +3278,8 @@ single_entry: return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event); + env, warn_lost_event, + annotation_opts); } if (symbol_conf.event_group) { @@ -3285,5 +3297,6 @@ single_entry: return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event); + warn_lost_event, + annotation_opts); } diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 9428bee076f2..91d3e18b50aa 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,6 +4,8 @@ #include "ui/browser.h" +struct annotation_options; + struct hist_browser { struct ui_browser b; struct hists *hists; @@ -12,6 +14,7 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; + struct annotation_options *annotation_opts; int print_seq; bool show_dso; bool show_headers; diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index e03fa75f108a..5b8b8c637686 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -104,7 +104,7 @@ int map__browse(struct map *map) { struct map_browser mb = { .b = { - .entries = &map->dso->symbols[map->type], + .entries = &map->dso->symbols, .refresh = ui_browser__rb_tree_refresh, .seek = ui_browser__rb_tree_seek, .write = map_browser__write, diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index aeeaf15029f0..48428c9acd89 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL); + err = symbol__annotate(sym, map, evsel, 0, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 24e1ec201ffd..4ab663ec3e5e 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -382,7 +382,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (symbol_conf.use_callchain && hists__has(hists, sym)) { + if (hist_entry__has_callchains(h) && + symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? h->stat_acc->period : h->stat.period; @@ -479,7 +480,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, } } - if (symbol_conf.use_callchain && he->leaf) { + if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? he->stat_acc->period : he->stat.period; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 706f6f1e9c7d..fe3dfaa64a91 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -207,7 +207,7 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; - if (a->thread != b->thread || !symbol_conf.use_callchain) + if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain) return 0; ret = b->callchain->max_depth - a->callchain->max_depth; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 6832fcb2e6ff..69b7a28f7a1c 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, } printed += putc('\n', fp); - if (symbol_conf.use_callchain && he->leaf) { + if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) { u64 total = hists__total_period(hists); printed += hist_entry_callchain__fprintf(he, total, 0, fp); @@ -550,7 +550,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); - if (use_callchain) + if (hist_entry__has_callchains(he) && use_callchain) callchain_ret = hist_entry_callchain__fprintf(he, total_period, 0, fp); @@ -819,8 +819,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, } if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(h->thread->mg, - MAP__FUNCTION, fp); + map_groups__fprintf(h->thread->mg, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8052373bcd6a..b604ef334dc9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -24,7 +24,6 @@ libperf-y += libstring.o libperf-y += bitmap.o libperf-y += hweight.o libperf-y += smt.o -libperf-y += quote.o libperf-y += strbuf.o libperf-y += string.o libperf-y += strlist.o @@ -152,6 +151,8 @@ libperf-y += perf-hooks.o libperf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" + # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5d74a30fe00f..f91775b4bc3c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -21,6 +21,7 @@ #include "debug.h" #include "annotate.h" #include "evsel.h" +#include "evlist.h" #include "block-range.h" #include "string2.h" #include "arch/common.h" @@ -46,11 +47,10 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .annotate_src = true, .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, }; -const char *disassembler_style; -const char *objdump_path; static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); @@ -678,10 +678,28 @@ static struct arch *arch__find(const char *name) return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } -int symbol__alloc_hist(struct symbol *sym) +static struct annotated_source *annotated_source__new(void) +{ + struct annotated_source *src = zalloc(sizeof(*src)); + + if (src != NULL) + INIT_LIST_HEAD(&src->source); + + return src; +} + +static __maybe_unused void annotated_source__delete(struct annotated_source *src) +{ + if (src == NULL) + return; + zfree(&src->histograms); + zfree(&src->cycles_hist); + free(src); +} + +static int annotated_source__alloc_histograms(struct annotated_source *src, + size_t size, int nr_hists) { - struct annotation *notes = symbol__annotation(sym); - size_t size = symbol__size(sym); size_t sizeof_sym_hist; /* @@ -701,17 +719,13 @@ int symbol__alloc_hist(struct symbol *sym) sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); /* Check for overflow in zalloc argument */ - if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) - / symbol_conf.nr_events) + if (sizeof_sym_hist > SIZE_MAX / nr_hists) return -1; - notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); - if (notes->src == NULL) - return -1; - notes->src->sizeof_sym_hist = sizeof_sym_hist; - notes->src->nr_histograms = symbol_conf.nr_events; - INIT_LIST_HEAD(¬es->src->source); - return 0; + src->sizeof_sym_hist = sizeof_sym_hist; + src->nr_histograms = nr_hists; + src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + return src->histograms ? 0 : -1; } /* The cycles histogram is lazily allocated. */ @@ -741,14 +755,11 @@ void symbol__annotate_zero_histograms(struct symbol *sym) pthread_mutex_unlock(¬es->lock); } -static int __symbol__account_cycles(struct annotation *notes, +static int __symbol__account_cycles(struct cyc_hist *ch, u64 start, unsigned offset, unsigned cycles, unsigned have_start) { - struct cyc_hist *ch; - - ch = notes->src->cycles_hist; /* * For now we can only account one basic block per * final jump. But multiple could be overlapping. @@ -760,6 +771,15 @@ static int __symbol__account_cycles(struct annotation *notes, ch[offset].num_aggr++; ch[offset].cycles_aggr += cycles; + if (cycles > ch[offset].cycles_max) + ch[offset].cycles_max = cycles; + + if (ch[offset].cycles_min) { + if (cycles && cycles < ch[offset].cycles_min) + ch[offset].cycles_min = cycles; + } else + ch[offset].cycles_min = cycles; + if (!have_start && ch[offset].have_start) return 0; if (ch[offset].num) { @@ -782,7 +802,7 @@ static int __symbol__account_cycles(struct annotation *notes, } static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, - struct annotation *notes, int evidx, u64 addr, + struct annotated_source *src, int evidx, u64 addr, struct perf_sample *sample) { unsigned offset; @@ -798,7 +818,12 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, } offset = addr - sym->start; - h = annotation__histogram(notes, evidx); + h = annotated_source__histogram(src, evidx); + if (h == NULL) { + pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n", + __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); + return -ENOMEM; + } h->nr_samples++; h->addr[offset].nr_samples++; h->period += sample->period; @@ -811,45 +836,69 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) +static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); if (notes->src == NULL) { - if (symbol__alloc_hist(sym) < 0) + notes->src = annotated_source__new(); + if (notes->src == NULL) return NULL; + goto alloc_cycles_hist; } - if (!notes->src->cycles_hist && cycles) { - if (symbol__alloc_hist_cycles(sym) < 0) + + if (!notes->src->cycles_hist) { +alloc_cycles_hist: + symbol__alloc_hist_cycles(sym); + } + + return notes->src->cycles_hist; +} + +struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) +{ + struct annotation *notes = symbol__annotation(sym); + + if (notes->src == NULL) { + notes->src = annotated_source__new(); + if (notes->src == NULL) return NULL; + goto alloc_histograms; + } + + if (notes->src->histograms == NULL) { +alloc_histograms: + annotated_source__alloc_histograms(notes->src, symbol__size(sym), + nr_hists); } - return notes; + + return notes->src; } static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, - int evidx, u64 addr, + struct perf_evsel *evsel, u64 addr, struct perf_sample *sample) { - struct annotation *notes; + struct annotated_source *src; if (sym == NULL) return 0; - notes = symbol__get_annotation(sym, false); - if (notes == NULL) + src = symbol__hists(sym, evsel->evlist->nr_entries); + if (src == NULL) return -ENOMEM; - return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample); + return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample); } static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym, unsigned cycles) { - struct annotation *notes; + struct cyc_hist *cycles_hist; unsigned offset; if (sym == NULL) return 0; - notes = symbol__get_annotation(sym, true); - if (notes == NULL) + cycles_hist = symbol__cycles_hist(sym); + if (cycles_hist == NULL) return -ENOMEM; if (addr < sym->start || addr >= sym->end) return -ERANGE; @@ -861,7 +910,7 @@ static int symbol__account_cycles(u64 addr, u64 start, start = 0; } offset = addr - sym->start; - return __symbol__account_cycles(notes, + return __symbol__account_cycles(cycles_hist, start ? start - sym->start : 0, offset, cycles, !!start); @@ -953,8 +1002,11 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); al = notes->offsets[offset]; - if (al && ch->num_aggr) + if (al && ch->num_aggr) { al->cycles = ch->cycles_aggr / ch->num_aggr; + al->cycles_max = ch->cycles_max; + al->cycles_min = ch->cycles_min; + } notes->have_cycles = true; } } @@ -962,15 +1014,15 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, - int evidx) + struct perf_evsel *evsel) { - return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample); + return symbol__inc_addr_samples(ams->sym, ams->map, evsel, ams->al_addr, sample); } int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, - int evidx, u64 ip) + struct perf_evsel *evsel, u64 ip) { - return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample); + return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evsel, ip, sample); } static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) @@ -1019,6 +1071,7 @@ struct annotate_args { struct arch *arch; struct map_symbol ms; struct perf_evsel *evsel; + struct annotation_options *options; s64 offset; char *line; int line_nr; @@ -1560,6 +1613,7 @@ fallback: static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct annotation_options *opts = args->options; struct map *map = args->ms.map; struct dso *dso = map->dso; char *command; @@ -1607,13 +1661,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", - objdump_path ? objdump_path : "objdump", - disassembler_style ? "-M " : "", - disassembler_style ? disassembler_style : "", + opts->objdump_path ?: "objdump", + opts->disassembler_style ? "-M " : "", + opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", - symbol_conf.annotate_src ? "-S" : "", + opts->show_asm_raw ? "" : "--no-show-raw", + opts->annotate_src ? "-S" : "", symfs_filename, symfs_filename); if (err < 0) { @@ -1755,11 +1809,13 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, + struct annotation_options *options, struct arch **parch) { struct annotate_args args = { .privsize = privsize, .evsel = evsel, + .options = options, }; struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -1937,8 +1993,8 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) } int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool full_paths, - int min_pcnt, int max_lines, int context) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; char *filename; @@ -1950,23 +2006,28 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; + bool context = opts->context; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; int graph_dotted_len; + char buf[512]; filename = strdup(dso->long_name); if (!filename) return -ENOMEM; - if (full_paths) + if (opts->full_path) d_filename = filename; else d_filename = basename(filename); len = symbol__size(sym); - if (perf_evsel__is_group_event(evsel)) + if (perf_evsel__is_group_event(evsel)) { width *= evsel->nr_members; + perf_evsel__group_desc(evsel, buf, sizeof(buf)); + evsel_name = buf; + } graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", width, width, symbol_conf.show_total_period ? "Period" : @@ -1990,7 +2051,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, } err = annotation_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, + opts->min_pcnt, printed, opts->max_lines, queue, addr_fmt_width); switch (err) { @@ -2323,20 +2384,19 @@ static void symbol__calc_lines(struct symbol *sym, struct map *map, } int symbol__tty_annotate2(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - struct annotation_options opts = annotation__default_options; struct annotation *notes = symbol__annotation(sym); char buf[1024]; - if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0) + if (symbol__annotate2(sym, map, evsel, opts, NULL) < 0) return -1; - if (print_lines) { - srcline_full_filename = full_paths; + if (opts->print_lines) { + srcline_full_filename = opts->full_path; symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } @@ -2351,25 +2411,24 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map, } int symbol__tty_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths, int min_pcnt, int max_lines) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, opts, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); - if (print_lines) { - srcline_full_filename = full_paths; + if (opts->print_lines) { + srcline_full_filename = opts->full_path; symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(sym, map, evsel, full_paths, - min_pcnt, max_lines, 0); + symbol__annotate_printf(sym, map, evsel, opts); annotated_source__purge(symbol__annotation(sym)->src); @@ -2486,13 +2545,38 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (al->cycles) - obj__printf(obj, "%*" PRIu64 " ", + if (!notes->options->show_minmax_cycle) { + if (al->cycles) + obj__printf(obj, "%*" PRIu64 " ", ANNOTATION__CYCLES_WIDTH - 1, al->cycles); - else if (!show_title) - obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); - else - obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle"); + else if (!show_title) + obj__printf(obj, "%*s", + ANNOTATION__CYCLES_WIDTH, " "); + else + obj__printf(obj, "%*s ", + ANNOTATION__CYCLES_WIDTH - 1, + "Cycle"); + } else { + if (al->cycles) { + char str[32]; + + scnprintf(str, sizeof(str), + "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", + al->cycles, al->cycles_min, + al->cycles_max); + + obj__printf(obj, "%*s ", + ANNOTATION__MINMAX_CYCLES_WIDTH - 1, + str); + } else if (!show_title) + obj__printf(obj, "%*s", + ANNOTATION__MINMAX_CYCLES_WIDTH, + " "); + else + obj__printf(obj, "%*s ", + ANNOTATION__MINMAX_CYCLES_WIDTH - 1, + "Cycle(min/max)"); + } } obj__printf(obj, " "); @@ -2579,7 +2663,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, 0, parch); + err = symbol__annotate(sym, map, evsel, 0, options, parch); if (err) goto out_free_offsets; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f28a9e43421d..a4c0d91907e6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,16 +61,27 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__IPC_WIDTH 6 #define ANNOTATION__CYCLES_WIDTH 6 +#define ANNOTATION__MINMAX_CYCLES_WIDTH 19 struct annotation_options { bool hide_src_code, use_offset, jump_arrows, + print_lines, + full_path, show_linenr, show_nr_jumps, show_nr_samples, - show_total_period; + show_total_period, + show_minmax_cycle, + show_asm_raw, + annotate_src; u8 offset_level; + int min_pcnt; + int max_lines; + int context; + const char *objdump_path; + const char *disassembler_style; }; enum { @@ -105,6 +116,8 @@ struct annotation_line { int jump_sources; float ipc; u64 cycles; + u64 cycles_max; + u64 cycles_min; size_t privsize; char *path; u32 idx; @@ -186,6 +199,8 @@ struct cyc_hist { u64 start; u64 cycles; u64 cycles_aggr; + u64 cycles_max; + u64 cycles_min; u32 num; u32 num_aggr; u8 have_start; @@ -195,7 +210,11 @@ struct cyc_hist { /** struct annotated_source - symbols with hits have this attached as in sannotation * - * @histogram: Array of addr hit histograms per event being monitored + * @histograms: Array of addr hit histograms per event being monitored + * nr_histograms: This may not be the same as evsel->evlist->nr_entries if + * we have more than a group in a evlist, where we will want + * to see each group separately, that is why symbol__annotate2() + * sets src->nr_histograms to evsel->nr_members. * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS * @cyc_hist: Average cycles per basic block @@ -211,7 +230,7 @@ struct annotated_source { int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; - struct sym_hist histograms[0]; + struct sym_hist *histograms; }; struct annotation { @@ -239,6 +258,9 @@ struct annotation { static inline int annotation__cycles_width(struct annotation *notes) { + if (notes->have_cycles && notes->options->show_minmax_cycle) + return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; + return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } @@ -258,10 +280,14 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); +static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) +{ + return ((void *)src->histograms) + (src->sizeof_sym_hist * idx); +} + static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { - return (((void *)¬es->src->histograms) + - (notes->src->sizeof_sym_hist * idx)); + return annotated_source__histogram(notes->src, idx); } static inline struct annotation *symbol__annotation(struct symbol *sym) @@ -270,20 +296,21 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, - int evidx); + struct perf_evsel *evsel); int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, - int evidx, u64 addr); + struct perf_evsel *evsel, u64 addr); -int symbol__alloc_hist(struct symbol *sym); +struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, + struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel, @@ -311,8 +338,8 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map, int errnum, char *buf, size_t buflen); int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool full_paths, - int min_pcnt, int max_lines, int context); + struct perf_evsel *evsel, + struct annotation_options *options); int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); @@ -323,30 +350,27 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel) bool ui__has_annotation(void); int symbol__tty_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths, int min_pcnt, int max_lines); + struct perf_evsel *evsel, struct annotation_options *opts); int symbol__tty_annotate2(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths); + struct perf_evsel *evsel, struct annotation_options *opts); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *opts); #else static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, struct map *map __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt - __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *opts __maybe_unused) { return 0; } #endif -extern const char *disassembler_style; - void annotation_config__init(void); #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 857de69a5361..d056447520a2 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1679,7 +1679,7 @@ struct sym_args { static bool kern_sym_match(struct sym_args *args, const char *name, char type) { /* A function with the same name, and global or the n'th found or any */ - return symbol_type__is_a(type, MAP__FUNCTION) && + return kallsyms__is_function(type) && !strcmp(name, args->name) && ((args->global && isupper(type)) || (args->selected && ++(args->cnt) == args->idx) || @@ -1784,7 +1784,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused, { struct sym_args *args = arg; - if (!symbol_type__is_a(type, MAP__FUNCTION)) + if (!kallsyms__is_function(type)) return 0; if (!args->started) { @@ -1915,7 +1915,7 @@ static void print_duplicate_syms(struct dso *dso, const char *sym_name) pr_err("Multiple symbols with name '%s'\n", sym_name); - sym = dso__first_symbol(dso, MAP__FUNCTION); + sym = dso__first_symbol(dso); while (sym) { if (dso_sym_match(sym, sym_name, &cnt, -1)) { pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n", @@ -1945,7 +1945,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *start = 0; *size = 0; - sym = dso__first_symbol(dso, MAP__FUNCTION); + sym = dso__first_symbol(dso); while (sym) { if (*start) { if (!*size) @@ -1972,8 +1972,8 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso) { - struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION); - struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION); + struct symbol *first_sym = dso__first_symbol(dso); + struct symbol *last_sym = dso__last_symbol(dso); if (!first_sym || !last_sym) { pr_err("Failed to determine filter for %s\nNo symbols found.\n", diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index af7ad814b2c3..cee658733e2c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) } obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load buffer\n"); return ERR_PTR(-EINVAL); } @@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) pr_debug("bpf: successfull builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); - if (!IS_ERR(obj) && llvm_param.dump_obj) + if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) llvm__dump_obj(filename, obj_buf, obj_buf_sz); free(obj_buf); } else obj = bpf_object__open(filename); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load %s\n", filename); return obj; } diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 29347756b0af..77e4891e17b0 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -61,7 +61,7 @@ check_pos(struct bpf_insn_pos *pos) /* * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM + * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM * instruction (BPF_{B,H,W,DW}). */ static int diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 537eadd81914..04b1d53e4bf9 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -47,9 +47,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - - if (al.map != NULL) + if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) al.map->dso->hit = 1; thread__put(thread); diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index bf31ceab33bd..89512504551b 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module) raw_svector_ostream ostream(*Buffer); legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile)) { + bool NotAdded; +#if CLANG_VERSION_MAJOR < 7 + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile); +#else + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, + TargetMachine::CGFT_ObjectFile); +#endif + if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);; } diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index decb91f9da82..ccd02634a616 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -93,20 +93,17 @@ static int open_cgroup(const char *name) static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str) { struct perf_evsel *counter; - struct cgroup *cgrp = NULL; /* * check if cgrp is already defined, if so we reuse it */ evlist__for_each_entry(evlist, counter) { if (!counter->cgrp) continue; - if (!strcmp(counter->cgrp->name, str)) { - cgrp = cgroup__get(counter->cgrp); - break; - } + if (!strcmp(counter->cgrp->name, str)) + return cgroup__get(counter->cgrp); } - return cgrp; + return NULL; } static struct cgroup *cgroup__new(const char *name) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 84eb9393c7db..5ac157056cdf 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -707,6 +707,14 @@ struct perf_config_set *perf_config_set__new(void) return set; } +static int perf_config__init(void) +{ + if (config_set == NULL) + config_set = perf_config_set__new(); + + return config_set == NULL; +} + int perf_config(config_fn_t fn, void *data) { int ret = 0; @@ -714,7 +722,7 @@ int perf_config(config_fn_t fn, void *data) struct perf_config_section *section; struct perf_config_item *item; - if (config_set == NULL) + if (config_set == NULL && perf_config__init()) return -1; perf_config_set__for_each_entry(config_set, section, item) { @@ -735,12 +743,6 @@ int perf_config(config_fn_t fn, void *data) return ret; } -void perf_config__init(void) -{ - if (config_set == NULL) - config_set = perf_config_set__new(); -} - void perf_config__exit(void) { perf_config_set__delete(config_set); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index baf82bf227ac..bd0a5897c76a 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -38,7 +38,6 @@ struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); -void perf_config__init(void); void perf_config__exit(void); void perf_config__refresh(void); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index c8b98fa22997..4d5fc374e730 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, /* Nothing to do, might as well just return */ if (decoder->packet_count == 0) return 0; + /* + * The queueing process in function cs_etm_decoder__buffer_packet() + * increments the tail *before* using it. This is somewhat counter + * intuitive but it has the advantage of centralizing tail management + * at a single location. Because of that we need to follow the same + * heuristic with the head, i.e we increment it before using its + * value. Otherwise the first element of the packet queue is not + * used. + */ + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); *packet = decoder->packet_buffer[decoder->head]; - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); - decoder->packet_count--; return 1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index bf16dc9ee507..822ba915d144 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -270,9 +270,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, thread = etmq->etm->unknown_thread; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); - - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) return 0; if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b0c2b5c5d337..7123746edcf4 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -247,9 +247,9 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, *dso_db_id = dso->db_id; if (!al->sym) { - al->sym = symbol__new(al->addr, 0, 0, "unknown"); + al->sym = symbol__new(al->addr, 0, 0, 0, "unknown"); if (al->sym) - dso__insert_symbol(dso, al->map->type, al->sym); + dso__insert_symbol(dso, al->sym); } if (al->sym) { @@ -315,8 +315,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, al.addr = node->ip; if (al.map && !al.sym) - al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION, - al.addr); + al.sym = dso__find_symbol(al.map->dso, al.addr); db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 36ef45b2e89d..51cf82cf1882 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -354,6 +354,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) || (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) || (strncmp(name, "[vdso]", 6) == 0) || + (strncmp(name, "[vdso32]", 8) == 0) || + (strncmp(name, "[vdsox32]", 9) == 0) || (strncmp(name, "[vsyscall]", 10) == 0)) { m->kmod = false; @@ -1014,7 +1016,7 @@ struct map *dso__new_map(const char *name) struct dso *dso = dso__new(name); if (dso) - map = map__new2(0, dso, MAP__FUNCTION); + map = map__new2(0, dso); return map; } @@ -1176,19 +1178,19 @@ int dso__name_len(const struct dso *dso) return dso->short_name_len; } -bool dso__loaded(const struct dso *dso, enum map_type type) +bool dso__loaded(const struct dso *dso) { - return dso->loaded & (1 << type); + return dso->loaded; } -bool dso__sorted_by_name(const struct dso *dso, enum map_type type) +bool dso__sorted_by_name(const struct dso *dso) { - return dso->sorted_by_name & (1 << type); + return dso->sorted_by_name; } -void dso__set_sorted_by_name(struct dso *dso, enum map_type type) +void dso__set_sorted_by_name(struct dso *dso) { - dso->sorted_by_name |= (1 << type); + dso->sorted_by_name = true; } struct dso *dso__new(const char *name) @@ -1196,12 +1198,10 @@ struct dso *dso__new(const char *name) struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); if (dso != NULL) { - int i; strcpy(dso->name, name); dso__set_long_name(dso, dso->name, false); dso__set_short_name(dso, dso->name, false); - for (i = 0; i < MAP__NR_TYPES; ++i) - dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; + dso->symbols = dso->symbol_names = RB_ROOT; dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT; dso->srclines = RB_ROOT; @@ -1231,8 +1231,6 @@ struct dso *dso__new(const char *name) void dso__delete(struct dso *dso) { - int i; - if (!RB_EMPTY_NODE(&dso->rb_node)) pr_err("DSO %s is still in rbtree when being deleted!\n", dso->long_name); @@ -1240,8 +1238,7 @@ void dso__delete(struct dso *dso) /* free inlines first, as they reference symbols */ inlines__tree_delete(&dso->inlined_nodes); srcline__tree_delete(&dso->srclines); - for (i = 0; i < MAP__NR_TYPES; ++i) - symbols__delete(&dso->symbols[i]); + symbols__delete(&dso->symbols); if (dso->short_name_allocated) { zfree((char **)&dso->short_name); @@ -1451,9 +1448,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp) size_t ret = 0; list_for_each_entry(pos, head, node) { - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - ret += dso__fprintf(pos, i, fp); + ret += dso__fprintf(pos, fp); } return ret; @@ -1467,18 +1462,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) return fprintf(fp, "%s", sbuild_id); } -size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) +size_t dso__fprintf(struct dso *dso, FILE *fp) { struct rb_node *nd; size_t ret = fprintf(fp, "dso: %s (", dso->short_name); if (dso->short_name != dso->long_name) ret += fprintf(fp, "%s, ", dso->long_name); - ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type], - dso__loaded(dso, type) ? "" : "NOT "); + ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); - for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c229dbe0277a..ef69de2e69ea 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -140,14 +140,14 @@ struct dso { struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ - struct rb_root symbols[MAP__NR_TYPES]; - struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root symbols; + struct rb_root symbol_names; struct rb_root inlined_nodes; struct rb_root srclines; struct { u64 addr; struct symbol *symbol; - } last_find_result[MAP__NR_TYPES]; + } last_find_result; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -164,8 +164,8 @@ struct dso { u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; - u8 sorted_by_name; - u8 loaded; + bool sorted_by_name; + bool loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; u64 text_offset; @@ -202,14 +202,13 @@ struct dso { * @dso: the 'struct dso *' in which symbols itereated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage - * @type: the 'enum map_type' type of symbols */ -#define dso__for_each_symbol(dso, pos, n, type) \ - symbols__for_each_entry(&(dso)->symbols[(type)], pos, n) +#define dso__for_each_symbol(dso, pos, n) \ + symbols__for_each_entry(&(dso)->symbols, pos, n) -static inline void dso__set_loaded(struct dso *dso, enum map_type type) +static inline void dso__set_loaded(struct dso *dso) { - dso->loaded |= (1 << type); + dso->loaded = true; } struct dso *dso__new(const char *name); @@ -231,11 +230,16 @@ static inline void __dso__zput(struct dso **dso) #define dso__zput(dso) __dso__zput(&dso) -bool dso__loaded(const struct dso *dso, enum map_type type); +bool dso__loaded(const struct dso *dso); -bool dso__sorted_by_name(const struct dso *dso, enum map_type type); -void dso__set_sorted_by_name(struct dso *dso, enum map_type type); -void dso__sort_by_name(struct dso *dso, enum map_type type); +static inline bool dso__has_symbols(const struct dso *dso) +{ + return !RB_EMPTY_ROOT(&dso->symbols); +} + +bool dso__sorted_by_name(const struct dso *dso); +void dso__set_sorted_by_name(struct dso *dso); +void dso__sort_by_name(struct dso *dso); void dso__set_build_id(struct dso *dso, void *build_id); bool dso__build_id_equal(const struct dso *dso, u8 *build_id); @@ -349,9 +353,8 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, size_t __dsos__fprintf(struct list_head *head, FILE *fp); size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); -size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp); -size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); +size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); +size_t dso__fprintf(struct dso *dso, FILE *fp); static inline bool dso__is_vmlinux(struct dso *dso) { diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4c842762e3f2..59f38c7693f8 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -93,6 +93,37 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +static int perf_env__read_arch(struct perf_env *env) +{ + struct utsname uts; + + if (env->arch) + return 0; + + if (!uname(&uts)) + env->arch = strdup(uts.machine); + + return env->arch ? 0 : -ENOMEM; +} + +static int perf_env__read_nr_cpus_avail(struct perf_env *env) +{ + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = cpu__max_present_cpu(); + + return env->nr_cpus_avail ? 0 : -ENOENT; +} + +const char *perf_env__raw_arch(struct perf_env *env) +{ + return env && !perf_env__read_arch(env) ? env->arch : "unknown"; +} + +int perf_env__nr_cpus_avail(struct perf_env *env) +{ + return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index c4ef2e523367..1f3ccc368530 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -76,4 +76,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__raw_arch(struct perf_env *env); +int perf_env__nr_cpus_avail(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 98ff3a6a3d50..0c8ecf0c78a4 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id) return perf_ns__names[id]; } -static int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process) +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) { struct perf_sample synth_sample = { .pid = -1, @@ -464,8 +464,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, { int rc = 0; struct map *pos; - struct map_groups *kmaps = &machine->kmaps; - struct maps *maps = &kmaps->maps[MAP__FUNCTION]; + struct maps *maps = machine__kernel_maps(machine); union perf_event *event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); if (event == NULL) { @@ -488,7 +487,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (__map__is_kernel(pos)) + if (!__map__is_kmodule(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); @@ -869,7 +868,7 @@ static int find_symbol_cb(void *arg, const char *name, char type, * Must be a function or at least an alias, as in PARISC64, where "_text" is * an 'A' to the same address as "_stext". */ - if (!(symbol_type__is_a(type, MAP__FUNCTION) || + if (!(kallsyms__is_function(type) || type == 'A') || strcmp(name, args->name)) return 0; @@ -889,9 +888,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename, return 0; } -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) +int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) { size_t size; struct map *map = machine__kernel_map(machine); @@ -944,6 +950,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return err; } +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int err; + + err = __perf_event__synthesize_kernel_mmap(tool, process, machine); + if (err < 0) + return err; + + return perf_event__synthesize_extra_kmaps(tool, process, machine); +} + int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, @@ -1489,9 +1508,8 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, return machine__process_event(machine, event, sample); } -void thread__find_addr_map(struct thread *thread, u8 cpumode, - enum map_type type, u64 addr, - struct addr_location *al) +struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) { struct map_groups *mg = thread->mg; struct machine *machine = mg->machine; @@ -1505,7 +1523,7 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode, if (machine == NULL) { al->map = NULL; - return; + return NULL; } if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { @@ -1533,10 +1551,10 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode, !perf_host) al->filtered |= (1 << HIST_FILTER__HOST); - return; + return NULL; } try_again: - al->map = map_groups__find(mg, type, al->addr); + al->map = map_groups__find(mg, al->addr); if (al->map == NULL) { /* * If this is outside of all known maps, and is a negative @@ -1563,17 +1581,17 @@ try_again: map__load(al->map); al->addr = al->map->map_ip(al->map, al->addr); } + + return al->map; } -void thread__find_addr_location(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al) +struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al) { - thread__find_addr_map(thread, cpumode, type, addr, al); - if (al->map != NULL) + al->sym = NULL; + if (thread__find_map(thread, cpumode, addr, al)) al->sym = map__find_symbol(al->map, al->addr); - else - al->sym = NULL; + return al->sym; } /* @@ -1590,7 +1608,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return -1; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); + thread__find_map(thread, sample->cpumode, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -1669,10 +1687,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void thread__resolve(struct thread *thread, struct addr_location *al, struct perf_sample *sample) { - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); - if (!al->map) - thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE, - sample->addr, al); + thread__find_map(thread, sample->cpumode, sample->addr, al); al->cpu = sample->cpu; al->sym = NULL; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 0f794744919c..bfa60bcafbde 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, bool mmap_data, unsigned int proc_map_timeout); +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); + size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a59281d64368..e7a4b31a84fb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1795,3 +1795,18 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) return true; } + +/* + * Events in data file are not collect in groups, but we still want + * the group display. Set the artificial group and set the leader's + * forced_leader flag to notify the display code. + */ +void perf_evlist__force_leader(struct perf_evlist *evlist) +{ + if (!evlist->nr_groups) { + struct perf_evsel *leader = perf_evlist__first(evlist); + + perf_evlist__set_leader(evlist); + leader->forced_leader = true; + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6c41b2f78713..dc66436add98 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -309,4 +309,7 @@ struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); + +void perf_evlist__force_leader(struct perf_evlist *evlist); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4cd2cf93f726..94fce4f537e9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2197,7 +2197,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } } - if (type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); OVERFLOW_CHECK_u64(array); @@ -2857,12 +2857,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Hint: Try again after reducing the number of events.\n" "Hint: Try increasing the limit with 'ulimit -n <limit>'"); case ENOMEM: - if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && + if (evsel__has_callchain(evsel) && access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) return scnprintf(msg, size, "Not enough memory to setup event with callchain.\n" "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" - "Hint: Current value: %d", sysctl_perf_event_max_stack); + "Hint: Current value: %d", sysctl__max_stack()); break; case ENODEV: if (target->cpu_list) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 92ec009a292d..d277930b19a1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -127,6 +127,7 @@ struct perf_evsel { bool precise_max; bool ignore_missing_thread; bool forced_leader; + bool use_uncore_alias; /* parse modifier helper */ int exclude_GH; int nr_members; @@ -458,6 +459,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evs return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool evsel__has_callchain(const struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; +} + typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c540d47583e7..aafbe54fd3fa 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -114,7 +114,7 @@ gen_build_id(struct buildid_note *note, fd = open("/dev/urandom", O_RDONLY); if (fd == -1) - err(1, "cannot access /dev/urandom for builid"); + err(1, "cannot access /dev/urandom for buildid"); sret = read(fd, note->build_id, sz); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a8bff2178fbc..653ff65aa2c3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1459,8 +1459,24 @@ static void print_cmdline(struct feat_fd *ff, FILE *fp) fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) - fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); + for (i = 0; i < nr; i++) { + char *argv_i = strdup(ff->ph->env.cmdline_argv[i]); + if (!argv_i) { + fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); + } else { + char *mem = argv_i; + do { + char *quote = strchr(argv_i, '\''); + if (!quote) + break; + *quote++ = '\0'; + fprintf(fp, "%s\\\'", argv_i); + argv_i = quote; + } while (1); + fprintf(fp, "%s ", argv_i); + free(mem); + } + } fputc('\n', fp); } @@ -2113,6 +2129,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; struct perf_header *ph = ff->ph; + bool do_core_id_test = true; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) @@ -2167,6 +2184,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) return 0; } + /* On s390 the socket_id number is not related to the numbers of cpus. + * The socket_id number might be higher than the numbers of cpus. + * This depends on the configuration. + */ + if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4)) + do_core_id_test = false; + for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2176,7 +2200,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - if (nr != (u32)-1 && nr > (u32)cpu_nr) { + if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { pr_debug("socket_id number is too big." "You may need to upgrade the perf tool.\n"); goto free_cpu; @@ -3312,8 +3336,6 @@ int perf_session__read_header(struct perf_session *session) lseek(fd, tmp, SEEK_SET); } - symbol_conf.nr_events = nr_attrs; - perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); @@ -3442,7 +3464,7 @@ int perf_event__process_feature(struct perf_tool *tool, pr_warning("invalid record type %d in pipe-mode\n", type); return 0; } - if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) { + if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) { pr_warning("invalid record type %d in pipe-mode\n", type); return -1; } @@ -3739,8 +3761,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); } - symbol_conf.nr_events = evlist->nr_entries; - return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4d602fba40b2..828cb9794c76 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists) static int hist_entry__init(struct hist_entry *he, struct hist_entry *template, - bool sample_self) + bool sample_self, + size_t callchain_size) { *he = *template; + he->callchain_size = callchain_size; if (symbol_conf.cumulate_callchain) { he->stat_acc = malloc(sizeof(he->stat)); @@ -410,7 +412,7 @@ static int hist_entry__init(struct hist_entry *he, map__get(he->mem_info->daddr.map); } - if (symbol_conf.use_callchain) + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) callchain_init(he->callchain); if (he->raw_data) { @@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, he = ops->new(callchain_size); if (he) { - err = hist_entry__init(he, template, sample_self); + err = hist_entry__init(he, template, sample_self, callchain_size); if (err) { ops->free(he); he = NULL; @@ -492,7 +494,7 @@ static u8 symbol__parent_filter(const struct symbol *parent) static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) { - if (!symbol_conf.use_callchain) + if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; @@ -619,9 +621,11 @@ __hists__add_entry(struct hists *hists, .raw_data = sample->raw_data, .raw_size = sample->raw_size, .ops = ops, - }; + }, *he = hists__findnew_entry(hists, &entry, al, sample_self); - return hists__findnew_entry(hists, &entry, al, sample_self); + if (!hists->has_callchains && he && he->callchain_size != 0) + hists->has_callchains = true; + return he; } struct hist_entry *hists__add_entry(struct hists *hists, @@ -986,7 +990,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - if (symbol_conf.use_callchain) + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) callchain_append(he->callchain, &cursor, sample->period); return 0; } @@ -1039,7 +1043,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int err, err2; struct map *alm = NULL; - if (al && al->map) + if (al) alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, @@ -1373,7 +1377,8 @@ static int hists__hierarchy_insert_entry(struct hists *hists, if (new_he) { new_he->leaf = true; - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(new_he) && + symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); if (callchain_merge(&callchain_cursor, new_he->callchain, @@ -1414,7 +1419,7 @@ static int hists__collapse_insert_entry(struct hists *hists, if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); if (callchain_merge(&callchain_cursor, iter->callchain, @@ -1757,7 +1762,7 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro bool use_callchain; if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + use_callchain = evsel__has_callchain(evsel); else use_callchain = symbol_conf.use_callchain; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index fbabfd8a215d..73049f7f0f60 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -85,6 +85,7 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; + bool has_callchains; int socket_filter; struct perf_hpp_list *hpp_list; struct list_head hpp_formats; @@ -220,6 +221,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) return &hevsel->hists; } +static __pure inline bool hists__has_callchains(struct hists *hists) +{ + return hists->has_callchains; +} + int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); @@ -419,19 +425,24 @@ struct hist_browser_timer { int refresh; }; +struct annotation_options; + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *annotation_opts); int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *annotation_opts); int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event); + bool warn_lost_event, + struct annotation_options *annotation_options); int script_browse(const char *script_opt); #else static inline @@ -440,20 +451,23 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused) + bool warn_lost_event __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *annotation_opts __maybe_unused) { return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 72db2744876d..7f0c83b6332b 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -335,8 +335,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) if (!thread) return -1; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) goto out_put; len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index e23578c7b1be..2669c9f748e4 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f9157aed1289..d404bed7003a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -113,6 +113,7 @@ struct intel_pt_decoder { bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -226,6 +227,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; + decoder->flags = params->flags; + decoder->period = params->period; decoder->period_type = params->period_type; @@ -1097,6 +1100,15 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) return ret; } +static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, + uint64_t ip, int err) +{ + return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err && + intel_pt_insn->branch == INTEL_PT_BR_INDIRECT && + ip == decoder->ip + intel_pt_insn->length; +} + static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; @@ -1109,10 +1121,11 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); if (err == INTEL_PT_RETURN) return 0; - if (err == -EAGAIN) { + if (err == -EAGAIN || + intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { if (intel_pt_fup_event(decoder)) return 0; - return err; + return -EAGAIN; } decoder->set_fup_tx_flags = false; if (err) @@ -1376,7 +1389,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->have_tma = false; decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; @@ -1604,7 +1616,6 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_PSB: case INTEL_PT_TSC: case INTEL_PT_TMA: - case INTEL_PT_CBR: case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: case INTEL_PT_PSBEND: @@ -1620,6 +1631,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->pkt_step = 0; return -ENOENT; + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + case INTEL_PT_OVF: return intel_pt_overflow(decoder); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index fc1752d50019..51c18d67f4ca 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -60,6 +60,14 @@ enum { INTEL_PT_ERR_MAX, }; +enum intel_pt_param_flags { + /* + * FUP packet can contain next linear instruction pointer instead of + * current linear instruction pointer. + */ + INTEL_PT_FUP_WITH_NLIP = 1 << 0, +}; + struct intel_pt_state { enum intel_pt_sample_type type; int err; @@ -106,6 +114,7 @@ struct intel_pt_params { unsigned int mtc_period; uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; + enum intel_pt_param_flags flags; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index ba4c9dd18643..d426761a549d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -366,7 +366,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, if (len < offs) return INTEL_PT_NEED_MORE_BYTES; byte = buf[offs++]; - payload |= (byte >> 1) << shift; + payload |= ((uint64_t)byte >> 1) << shift; } packet->type = INTEL_PT_CYC; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0effaff57020..aec68908d604 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -442,8 +442,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, } while (1) { - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) return -EINVAL; if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && @@ -596,8 +595,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) if (!thread) return -EINVAL; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) return -EINVAL; offset = al.map->map_ip(al.map, ip); @@ -751,6 +749,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { struct intel_pt_params params = { .get_trace = 0, }; + struct perf_env *env = pt->machine->env; struct intel_pt_queue *ptq; ptq = zalloc(sizeof(struct intel_pt_queue)); @@ -832,6 +831,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, } } + if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18)) + params.flags |= INTEL_PT_FUP_WITH_NLIP; + ptq->decoder = intel_pt_decoder_new(¶ms); if (!ptq->decoder) goto out_free; @@ -1523,6 +1525,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (intel_pt_is_switch_ip(ptq, state->to_ip)) { switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: case INTEL_PT_SS_UNKNOWN: case INTEL_PT_SS_EXPECTING_SWITCH_IP: err = intel_pt_next_tid(pt, ptq); @@ -1565,7 +1568,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) if (map__load(map)) return 0; - start = dso__first_symbol(map->dso, MAP__FUNCTION); + start = dso__first_symbol(map->dso); for (sym = start; sym; sym = dso__next_symbol(sym)) { if (sym->binding == STB_GLOBAL && diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 1cca0a2fa641..5e94857dfca2 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -14,11 +14,12 @@ #include "config.h" #include "util.h" #include <sys/wait.h> +#include <subcmd/exec-cmd.h> #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $KERNEL_INC_OPTIONS " \ + "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -" @@ -212,7 +213,7 @@ version_notice(void) " \t\thttp://llvm.org/apt\n\n" " \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" " \toption in [llvm] section of ~/.perfconfig to:\n\n" -" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" +" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n" " \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" " \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" " \t(Replace /path/to/llc with path to your llc)\n\n" @@ -265,16 +266,16 @@ static const char *kinc_fetch_script = "#!/usr/bin/env sh\n" "if ! test -d \"$KBUILD_DIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "TMPDIR=`mktemp -d`\n" "if test -z \"$TMPDIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "cat << EOF > $TMPDIR/Makefile\n" "obj-y := dummy.o\n" @@ -431,9 +432,11 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, const char *clang_opt = llvm_param.clang_opt; char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; char serr[STRERR_BUFSIZE]; - char *kbuild_dir = NULL, *kbuild_include_opts = NULL; + char *kbuild_dir = NULL, *kbuild_include_opts = NULL, + *perf_bpf_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; - char *command_echo, *command_out; + char *command_echo = NULL, *command_out; + char *perf_include_dir = system_path(PERF_INCLUDE_DIR); if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -471,12 +474,14 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - + if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0) + goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); force_set_env("CLANG_EXEC", clang_path); force_set_env("CLANG_OPTIONS", clang_opt); force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); + force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts); force_set_env("WORKING_DIR", kbuild_dir ? : "."); /* @@ -512,6 +517,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(command_out); free(kbuild_dir); free(kbuild_include_opts); + free(perf_bpf_include_opts); + free(perf_include_dir); if (!p_obj_buf) free(obj_buf); @@ -526,6 +533,8 @@ errout: free(kbuild_dir); free(kbuild_include_opts); free(obj_buf); + free(perf_bpf_include_opts); + free(perf_include_dir); if (p_obj_buf) *p_obj_buf = NULL; if (p_obj_buf_sz) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 32d50492505d..e7b4a8b513f2 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -24,6 +24,7 @@ #include "sane_ctype.h" #include <symbol/kallsyms.h> +#include <linux/mman.h> static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -81,8 +82,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->kptr_restrict_warned = false; machine->comm_exec = false; machine->kernel_start = 0; - - memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps)); + machine->vmlinux_map = NULL; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -137,13 +137,11 @@ struct machine *machine__new_kallsyms(void) struct machine *machine = machine__new_host(); /* * FIXME: - * 1) MAP__FUNCTION will go away when we stop loading separate maps for - * functions and data objects. - * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely + * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ - if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { + if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) { machine__delete(machine); machine = NULL; } @@ -673,8 +671,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, if (kmod_path__parse_name(&m, filename)) return NULL; - map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, - m.name); + map = map_groups__find_by_name(&machine->kmaps, m.name); if (map) { /* * If the map's dso is an offline module, give dso__load() @@ -689,7 +686,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, if (dso == NULL) goto out; - map = map__new2(start, dso, MAP__FUNCTION); + map = map__new2(start, dso); if (map == NULL) goto out; @@ -810,8 +807,8 @@ struct process_args { u64 start; }; -static void machine__get_kallsyms_filename(struct machine *machine, char *buf, - size_t bufsz) +void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz) { if (machine__is_default_guest(machine)) scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); @@ -854,65 +851,171 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } +int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm) +{ + struct kmap *kmap; + struct map *map; + + map = map__new2(xm->start, kernel); + if (!map) + return -1; + + map->end = xm->end; + map->pgoff = xm->pgoff; + + kmap = map__kmap(map); + + kmap->kmaps = &machine->kmaps; + strlcpy(kmap->name, xm->name, KMAP_NAME_LEN); + + map_groups__insert(&machine->kmaps, map); + + pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n", + kmap->name, map->start, map->end); + + map__put(map); + + return 0; +} + +static u64 find_entry_trampoline(struct dso *dso) +{ + /* Duplicates are removed so lookup all aliases */ + const char *syms[] = { + "_entry_trampoline", + "__entry_trampoline_start", + "entry_SYSCALL_64_trampoline", + }; + struct symbol *sym = dso__first_symbol(dso); + unsigned int i; + + for (; sym; sym = dso__next_symbol(sym)) { + if (sym->binding != STB_GLOBAL) + continue; + for (i = 0; i < ARRAY_SIZE(syms); i++) { + if (!strcmp(sym->name, syms[i])) + return sym->start; + } + } + + return 0; +} + +/* + * These values can be used for kernels that do not have symbols for the entry + * trampolines in kallsyms. + */ +#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL +#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 +#define X86_64_ENTRY_TRAMPOLINE 0x6000 + +/* Map x86_64 PTI entry trampolines */ +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel) +{ + struct map_groups *kmaps = &machine->kmaps; + struct maps *maps = &kmaps->maps; + int nr_cpus_avail, cpu; + bool found = false; + struct map *map; + u64 pgoff; + + /* + * In the vmlinux case, pgoff is a virtual address which must now be + * mapped to a vmlinux offset. + */ + for (map = maps__first(maps); map; map = map__next(map)) { + struct kmap *kmap = __map__kmap(map); + struct map *dest_map; + + if (!kmap || !is_entry_trampoline(kmap->name)) + continue; + + dest_map = map_groups__find(kmaps, map->pgoff); + if (dest_map != map) + map->pgoff = dest_map->map_ip(dest_map, map->pgoff); + found = true; + } + if (found || machine->trampolines_mapped) + return 0; + + pgoff = find_entry_trampoline(kernel); + if (!pgoff) + return 0; + + nr_cpus_avail = machine__nr_cpus_avail(machine); + + /* Add a 1 page map for each CPU's entry trampoline */ + for (cpu = 0; cpu < nr_cpus_avail; cpu++) { + u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU + + cpu * X86_64_CPU_ENTRY_AREA_SIZE + + X86_64_ENTRY_TRAMPOLINE; + struct extra_kernel_map xm = { + .start = va, + .end = va + page_size, + .pgoff = pgoff, + }; + + strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN); + + if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0) + return -1; + } + + machine->trampolines_mapped = nr_cpus_avail; + + return 0; +} + +int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused, + struct dso *kernel __maybe_unused) +{ + return 0; +} + static int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { - int type; + struct kmap *kmap; + struct map *map; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - struct map *map; - - machine->vmlinux_maps[type] = map__new2(0, kernel, type); - if (machine->vmlinux_maps[type] == NULL) - return -1; + machine->vmlinux_map = map__new2(0, kernel); + if (machine->vmlinux_map == NULL) + return -1; - machine->vmlinux_maps[type]->map_ip = - machine->vmlinux_maps[type]->unmap_ip = - identity__map_ip; - map = __machine__kernel_map(machine, type); - kmap = map__kmap(map); - if (!kmap) - return -1; + machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip; + map = machine__kernel_map(machine); + kmap = map__kmap(map); + if (!kmap) + return -1; - kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, map); - } + kmap->kmaps = &machine->kmaps; + map_groups__insert(&machine->kmaps, map); return 0; } void machine__destroy_kernel_maps(struct machine *machine) { - int type; - - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - struct map *map = __machine__kernel_map(machine, type); - - if (map == NULL) - continue; + struct kmap *kmap; + struct map *map = machine__kernel_map(machine); - kmap = map__kmap(map); - map_groups__remove(&machine->kmaps, map); - if (kmap && kmap->ref_reloc_sym) { - /* - * ref_reloc_sym is shared among all maps, so free just - * on one of them. - */ - if (type == MAP__FUNCTION) { - zfree((char **)&kmap->ref_reloc_sym->name); - zfree(&kmap->ref_reloc_sym); - } else - kmap->ref_reloc_sym = NULL; - } + if (map == NULL) + return; - map__put(machine->vmlinux_maps[type]); - machine->vmlinux_maps[type] = NULL; + kmap = map__kmap(map); + map_groups__remove(&machine->kmaps, map); + if (kmap && kmap->ref_reloc_sym) { + zfree((char **)&kmap->ref_reloc_sym->name); + zfree(&kmap->ref_reloc_sym); } + + map__zput(machine->vmlinux_map); } int machines__create_guest_kernel_maps(struct machines *machines) @@ -989,32 +1092,31 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type) +int machine__load_kallsyms(struct machine *machine, const char *filename) { struct map *map = machine__kernel_map(machine); int ret = __dso__load_kallsyms(map->dso, filename, map, true); if (ret > 0) { - dso__set_loaded(map->dso, type); + dso__set_loaded(map->dso); /* * Since /proc/kallsyms will have multiple sessions for the * kernel, with modules between them, fixup the end of all * sections. */ - __map_groups__fixup_end(&machine->kmaps, type); + map_groups__fixup_end(&machine->kmaps); } return ret; } -int machine__load_vmlinux_path(struct machine *machine, enum map_type type) +int machine__load_vmlinux_path(struct machine *machine) { struct map *map = machine__kernel_map(machine); int ret = dso__load_vmlinux_path(map->dso, map); if (ret > 0) - dso__set_loaded(map->dso, type); + dso__set_loaded(map->dso); return ret; } @@ -1055,10 +1157,9 @@ static bool is_kmod_dso(struct dso *dso) static int map_groups__set_module_path(struct map_groups *mg, const char *path, struct kmod_path *m) { - struct map *map; char *long_name; + struct map *map = map_groups__find_by_name(mg, m->name); - map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name); if (map == NULL) return 0; @@ -1207,19 +1308,14 @@ static int machine__create_modules(struct machine *machine) static void machine__set_kernel_mmap(struct machine *machine, u64 start, u64 end) { - int i; - - for (i = 0; i < MAP__NR_TYPES; i++) { - machine->vmlinux_maps[i]->start = start; - machine->vmlinux_maps[i]->end = end; - - /* - * Be a bit paranoid here, some perf.data file came with - * a zero sized synthesized MMAP event for the kernel. - */ - if (start == 0 && end == 0) - machine->vmlinux_maps[i]->end = ~0ULL; - } + machine->vmlinux_map->start = start; + machine->vmlinux_map->end = end; + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (start == 0 && end == 0) + machine->vmlinux_map->end = ~0ULL; } int machine__create_kernel_maps(struct machine *machine) @@ -1234,9 +1330,8 @@ int machine__create_kernel_maps(struct machine *machine) return -1; ret = __machine__create_kernel_maps(machine, kernel); - dso__put(kernel); if (ret < 0) - return -1; + goto out_put; if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { if (machine__is_host(machine)) @@ -1249,9 +1344,10 @@ int machine__create_kernel_maps(struct machine *machine) if (!machine__get_running_kernel_start(machine, &name, &addr)) { if (name && - maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) { machine__destroy_kernel_maps(machine); - return -1; + ret = -1; + goto out_put; } /* we have a real start address now, so re-order the kmaps */ @@ -1267,12 +1363,16 @@ int machine__create_kernel_maps(struct machine *machine) map__put(map); } + if (machine__create_extra_kernel_maps(machine, kernel)) + pr_debug("Problems creating extra kernel maps, continuing anyway...\n"); + /* update end address of the kernel map using adjacent module address */ map = map__next(machine__kernel_map(machine)); if (map) machine__set_kernel_mmap(machine, addr, map->start); - - return 0; +out_put: + dso__put(kernel); + return ret; } static bool machine__uses_kcore(struct machine *machine) @@ -1287,6 +1387,32 @@ static bool machine__uses_kcore(struct machine *machine) return false; } +static bool perf_event__is_extra_kernel_mmap(struct machine *machine, + union perf_event *event) +{ + return machine__is(machine, "x86_64") && + is_entry_trampoline(event->mmap.filename); +} + +static int machine__process_extra_kernel_map(struct machine *machine, + union perf_event *event) +{ + struct map *kernel_map = machine__kernel_map(machine); + struct dso *kernel = kernel_map ? kernel_map->dso : NULL; + struct extra_kernel_map xm = { + .start = event->mmap.start, + .end = event->mmap.start + event->mmap.len, + .pgoff = event->mmap.pgoff, + }; + + if (kernel == NULL) + return -1; + + strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); + + return machine__create_extra_kernel_map(machine, kernel, &xm); +} + static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { @@ -1379,9 +1505,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, * time /proc/sys/kernel/kptr_restrict was non zero. */ if (event->mmap.pgoff != 0) { - maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, - symbol_name, - event->mmap.pgoff); + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, + symbol_name, + event->mmap.pgoff); } if (machine__is_default_guest(machine)) { @@ -1390,6 +1516,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, */ dso__load(kernel, machine__kernel_map(machine)); } + } else if (perf_event__is_extra_kernel_mmap(machine, event)) { + return machine__process_extra_kernel_map(machine, event); } return 0; out_problem: @@ -1402,7 +1530,6 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; - enum map_type type; int ret = 0; if (dump_trace) @@ -1421,11 +1548,6 @@ int machine__process_mmap2_event(struct machine *machine, if (thread == NULL) goto out_problem; - if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) - type = MAP__VARIABLE; - else - type = MAP__FUNCTION; - map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, @@ -1433,7 +1555,7 @@ int machine__process_mmap2_event(struct machine *machine, event->mmap2.ino_generation, event->mmap2.prot, event->mmap2.flags, - event->mmap2.filename, type, thread); + event->mmap2.filename, thread); if (map == NULL) goto out_problem_map; @@ -1460,7 +1582,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event { struct thread *thread; struct map *map; - enum map_type type; + u32 prot = 0; int ret = 0; if (dump_trace) @@ -1479,16 +1601,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (thread == NULL) goto out_problem; - if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) - type = MAP__VARIABLE; - else - type = MAP__FUNCTION; + if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA)) + prot = PROT_EXEC; map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, prot, 0, event->mmap.filename, - type, thread); + thread); if (map == NULL) goto out_problem_map; @@ -1664,7 +1784,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); + thread__find_cpumode_addr_location(thread, ip, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -1681,15 +1801,7 @@ static void ip__resolve_data(struct thread *thread, memset(&al, 0, sizeof(al)); - thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al); - if (al.map == NULL) { - /* - * some shared data regions have execute bit set which puts - * their mapping in the MAP__FUNCTION type array. - * Check there as a fallback option before dropping the sample. - */ - thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al); - } + thread__find_symbol(thread, m, addr, &al); ams->addr = addr; ams->al_addr = al.addr; @@ -1758,8 +1870,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, MAP__FUNCTION, - ip, &al); + thread__find_cpumode_addr_location(thread, ip, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -1784,8 +1895,7 @@ static int add_callchain_ip(struct thread *thread, } return 0; } - thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, - ip, &al); + thread__find_symbol(thread, *cpumode, ip, &al); } if (al.sym != NULL) { @@ -1810,7 +1920,7 @@ static int add_callchain_ip(struct thread *thread, } srcline = callchain_srcline(al.map, al.sym, al.addr); - return callchain_cursor_append(cursor, al.addr, al.map, al.sym, + return callchain_cursor_append(cursor, ip, al.map, al.sym, branch, flags, nr_loop_iter, iter_cycles, branch_from, srcline); } @@ -2342,6 +2452,20 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; } +/* + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. + */ +bool machine__is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__raw_arch(machine->env), arch); +} + +int machine__nr_cpus_avail(struct machine *machine) +{ + return machine ? perf_env__nr_cpus_avail(machine->env) : 0; +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); @@ -2358,7 +2482,12 @@ int machine__get_kernel_start(struct machine *machine) machine->kernel_start = 1ULL << 63; if (map) { err = map__load(map); - if (!err) + /* + * On x86_64, PTI entry trampolines are less than the + * start of kernel text, but still above 2^63. So leave + * kernel_start = 1ULL << 63 for x86_64. + */ + if (!err && !machine__is(machine, "x86_64")) machine->kernel_start = map->start; } return err; @@ -2373,7 +2502,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch { struct machine *machine = vmachine; struct map *map; - struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map); + struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map); if (sym == NULL) return NULL; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 66cc200ef86f..1de7660d93e9 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -49,13 +49,14 @@ struct machine { struct perf_env *env; struct dsos dsos; struct map_groups kmaps; - struct map *vmlinux_maps[MAP__NR_TYPES]; + struct map *vmlinux_map; u64 kernel_start; pid_t *current_tid; union { /* Tool specific area */ void *priv; u64 db_id; }; + bool trampolines_mapped; }; static inline struct threads *machine__threads(struct machine *machine, pid_t tid) @@ -64,16 +65,22 @@ static inline struct threads *machine__threads(struct machine *machine, pid_t ti return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; } +/* + * The main kernel (vmlinux) map + */ static inline -struct map *__machine__kernel_map(struct machine *machine, enum map_type type) +struct map *machine__kernel_map(struct machine *machine) { - return machine->vmlinux_maps[type]; + return machine->vmlinux_map; } +/* + * kernel (the one returned by machine__kernel_map()) plus kernel modules maps + */ static inline -struct map *machine__kernel_map(struct machine *machine) +struct maps *machine__kernel_maps(struct machine *machine) { - return __machine__kernel_map(machine, MAP__FUNCTION); + return &machine->kmaps.maps; } int machine__get_kernel_start(struct machine *machine); @@ -182,6 +189,9 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is(struct machine *machine, const char *arch); +int machine__nr_cpus_avail(struct machine *machine); + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); @@ -190,44 +200,27 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); static inline -struct symbol *machine__find_kernel_symbol(struct machine *machine, - enum map_type type, u64 addr, +struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr, struct map **mapp) { - return map_groups__find_symbol(&machine->kmaps, type, addr, mapp); + return map_groups__find_symbol(&machine->kmaps, addr, mapp); } static inline struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, - enum map_type type, const char *name, + const char *name, struct map **mapp) { - return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp); -} - -static inline -struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, - struct map **mapp) -{ - return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr, - mapp); -} - -static inline -struct symbol *machine__find_kernel_function_by_name(struct machine *machine, - const char *name, - struct map **mapp) -{ - return map_groups__find_function_by_name(&machine->kmaps, name, mapp); + return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp); } struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); int arch__fix_module_text_start(u64 *start, const char *name); -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type); -int machine__load_vmlinux_path(struct machine *machine, enum map_type type); +int machine__load_kallsyms(struct machine *machine, const char *filename); + +int machine__load_vmlinux_path(struct machine *machine); size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); @@ -276,4 +269,25 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, */ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); +void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz); + +int machine__create_extra_kernel_maps(struct machine *machine, + struct dso *kernel); + +/* Kernel-space maps for symbols that are outside the main kernel map and module maps */ +struct extra_kernel_map { + u64 start; + u64 end; + u64 pgoff; + char name[KMAP_NAME_LEN]; +}; + +int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm); + +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8fe57031e1a8..89ac5b5dc218 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -22,11 +22,6 @@ static void __maps__insert(struct maps *maps, struct map *map); -const char *map_type__name[MAP__NR_TYPES] = { - [MAP__FUNCTION] = "Functions", - [MAP__VARIABLE] = "Variables", -}; - static inline int is_anon_memory(const char *filename, u32 flags) { return flags & MAP_HUGETLB || @@ -129,10 +124,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return false; } -void map__init(struct map *map, enum map_type type, - u64 start, u64 end, u64 pgoff, struct dso *dso) +void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) { - map->type = type; map->start = start; map->end = end; map->pgoff = pgoff; @@ -149,7 +142,7 @@ void map__init(struct map *map, enum map_type type, struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, char *filename, - enum map_type type, struct thread *thread) + struct thread *thread) { struct map *map = malloc(sizeof(*map)); struct nsinfo *nsi = NULL; @@ -173,7 +166,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map->flags = flags; nsi = nsinfo__get(thread->nsinfo); - if ((anon || no_dso) && nsi && type == MAP__FUNCTION) { + if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", nsi->pid); filename = newfilename; @@ -203,7 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (dso == NULL) goto out_delete; - map__init(map, type, start, start + len, pgoff, dso); + map__init(map, start, start + len, pgoff, dso); if (anon || no_dso) { map->map_ip = map->unmap_ip = identity__map_ip; @@ -213,8 +206,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, * functions still return NULL, and we avoid the * unnecessary map__load warning. */ - if (type != MAP__FUNCTION) - dso__set_loaded(dso, map->type); + if (!(prot & PROT_EXEC)) + dso__set_loaded(dso); } dso->nsinfo = nsi; dso__put(dso); @@ -231,7 +224,7 @@ out_delete: * they are loaded) and for vmlinux, where only after we load all the * symbols we'll know where it starts and ends. */ -struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +struct map *map__new2(u64 start, struct dso *dso) { struct map *map = calloc(1, (sizeof(*map) + (dso->kernel ? sizeof(struct kmap) : 0))); @@ -239,7 +232,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) /* * ->end will be filled after we load all the symbols */ - map__init(map, type, start, 0, 0, dso); + map__init(map, start, 0, 0, dso); } return map; @@ -256,7 +249,19 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) */ bool __map__is_kernel(const struct map *map) { - return __machine__kernel_map(map->groups->machine, map->type) == map; + return machine__kernel_map(map->groups->machine) == map; +} + +bool __map__is_extra_kernel_map(const struct map *map) +{ + struct kmap *kmap = __map__kmap((struct map *)map); + + return kmap && kmap->name[0]; +} + +bool map__has_symbols(const struct map *map) +{ + return dso__has_symbols(map->dso); } static void map__exit(struct map *map) @@ -279,7 +284,7 @@ void map__put(struct map *map) void map__fixup_start(struct map *map) { - struct rb_root *symbols = &map->dso->symbols[map->type]; + struct rb_root *symbols = &map->dso->symbols; struct rb_node *nd = rb_first(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -289,7 +294,7 @@ void map__fixup_start(struct map *map) void map__fixup_end(struct map *map) { - struct rb_root *symbols = &map->dso->symbols[map->type]; + struct rb_root *symbols = &map->dso->symbols; struct rb_node *nd = rb_last(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -304,7 +309,7 @@ int map__load(struct map *map) const char *name = map->dso->long_name; int nr; - if (dso__loaded(map->dso, map->type)) + if (dso__loaded(map->dso)) return 0; nr = dso__load(map->dso, map); @@ -348,7 +353,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr) if (map__load(map) < 0) return NULL; - return dso__find_symbol(map->dso, map->type, addr); + return dso__find_symbol(map->dso, addr); } struct symbol *map__find_symbol_by_name(struct map *map, const char *name) @@ -356,10 +361,10 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name) if (map__load(map) < 0) return NULL; - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); + if (!dso__sorted_by_name(map->dso)) + dso__sort_by_name(map->dso); - return dso__find_symbol_by_name(map->dso, map->type, name); + return dso__find_symbol_by_name(map->dso, name); } struct map *map__clone(struct map *from) @@ -410,16 +415,20 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } +char *map__srcline(struct map *map, u64 addr, struct symbol *sym) +{ + if (map == NULL) + return SRCLINE_UNKNOWN; + return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr); +} + int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp) { - char *srcline; int ret = 0; if (map && map->dso) { - srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr), NULL, - true, true, addr); + char *srcline = map__srcline(map, addr, NULL); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); @@ -440,6 +449,20 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, */ u64 map__rip_2objdump(struct map *map, u64 rip) { + struct kmap *kmap = __map__kmap(map); + + /* + * vmlinux does not have program headers for PTI entry trampolines and + * kcore may not either. However the trampoline object code is on the + * main kernel map, so just use that instead. + */ + if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) { + struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine); + + if (kernel_map) + map = kernel_map; + } + if (!map->dso->adjust_symbols) return rip; @@ -494,10 +517,7 @@ static void maps__init(struct maps *maps) void map_groups__init(struct map_groups *mg, struct machine *machine) { - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) { - maps__init(&mg->maps[i]); - } + maps__init(&mg->maps); mg->machine = machine; refcount_set(&mg->refcnt, 1); } @@ -525,22 +545,12 @@ static void maps__exit(struct maps *maps) void map_groups__exit(struct map_groups *mg) { - int i; - - for (i = 0; i < MAP__NR_TYPES; ++i) - maps__exit(&mg->maps[i]); + maps__exit(&mg->maps); } bool map_groups__empty(struct map_groups *mg) { - int i; - - for (i = 0; i < MAP__NR_TYPES; ++i) { - if (maps__first(&mg->maps[i])) - return false; - } - - return true; + return !maps__first(&mg->maps); } struct map_groups *map_groups__new(struct machine *machine) @@ -566,10 +576,9 @@ void map_groups__put(struct map_groups *mg) } struct symbol *map_groups__find_symbol(struct map_groups *mg, - enum map_type type, u64 addr, - struct map **mapp) + u64 addr, struct map **mapp) { - struct map *map = map_groups__find(mg, type, addr); + struct map *map = map_groups__find(mg, addr); /* Ensure map is loaded before using map->map_ip */ if (map != NULL && map__load(map) >= 0) { @@ -608,13 +617,10 @@ out: } struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, const char *name, struct map **mapp) { - struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp); - - return sym; + return maps__find_symbol_by_name(&mg->maps, name, mapp); } int map_groups__find_ams(struct addr_map_symbol *ams) @@ -622,8 +628,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams) if (ams->addr < ams->map->start || ams->addr >= ams->map->end) { if (ams->map->groups == NULL) return -1; - ams->map = map_groups__find(ams->map->groups, ams->map->type, - ams->addr); + ams->map = map_groups__find(ams->map->groups, ams->addr); if (ams->map == NULL) return -1; } @@ -646,7 +651,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); if (verbose > 2) { - printed += dso__fprintf(pos->dso, pos->type, fp); + printed += dso__fprintf(pos->dso, fp); printed += fprintf(fp, "--\n"); } } @@ -656,24 +661,14 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) return printed; } -size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, - FILE *fp) -{ - size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); - return printed += maps__fprintf(&mg->maps[type], fp); -} - size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) { - size_t printed = 0, i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_maps(mg, i, fp); - return printed; + return maps__fprintf(&mg->maps, fp); } static void __map_groups__insert(struct map_groups *mg, struct map *map) { - __maps__insert(&mg->maps[map->type], map); + __maps__insert(&mg->maps, map); map->groups = mg; } @@ -758,19 +753,18 @@ out: int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp) { - return maps__fixup_overlappings(&mg->maps[map->type], map, fp); + return maps__fixup_overlappings(&mg->maps, map, fp); } /* * XXX This should not really _copy_ te maps, but refcount them. */ -int map_groups__clone(struct thread *thread, - struct map_groups *parent, enum map_type type) +int map_groups__clone(struct thread *thread, struct map_groups *parent) { struct map_groups *mg = thread->mg; int err = -ENOMEM; struct map *map; - struct maps *maps = &parent->maps[type]; + struct maps *maps = &parent->maps; down_read(&maps->lock); @@ -877,15 +871,22 @@ struct map *map__next(struct map *map) return NULL; } -struct kmap *map__kmap(struct map *map) +struct kmap *__map__kmap(struct map *map) { - if (!map->dso || !map->dso->kernel) { - pr_err("Internal error: map__kmap with a non-kernel map\n"); + if (!map->dso || !map->dso->kernel) return NULL; - } return (struct kmap *)(map + 1); } +struct kmap *map__kmap(struct map *map) +{ + struct kmap *kmap = __map__kmap(map); + + if (!kmap) + pr_err("Internal error: map__kmap with a non-kernel map\n"); + return kmap; +} + struct map_groups *map__kmaps(struct map *map) { struct kmap *kmap = map__kmap(map); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e9bbe01b0ab..4cb90f242bed 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -8,19 +8,11 @@ #include <linux/rbtree.h> #include <pthread.h> #include <stdio.h> +#include <string.h> #include <stdbool.h> #include <linux/types.h> #include "rwsem.h" -enum map_type { - MAP__FUNCTION = 0, - MAP__VARIABLE, -}; - -#define MAP__NR_TYPES (MAP__VARIABLE + 1) - -extern const char *map_type__name[MAP__NR_TYPES]; - struct dso; struct ip_callchain; struct ref_reloc_sym; @@ -35,7 +27,6 @@ struct map { }; u64 start; u64 end; - u8 /* enum map_type */ type; bool erange_warned; u32 priv; u32 prot; @@ -56,9 +47,12 @@ struct map { refcount_t refcnt; }; +#define KMAP_NAME_LEN 256 + struct kmap { struct ref_reloc_sym *ref_reloc_sym; struct map_groups *kmaps; + char name[KMAP_NAME_LEN]; }; struct maps { @@ -67,7 +61,7 @@ struct maps { }; struct map_groups { - struct maps maps[MAP__NR_TYPES]; + struct maps maps; struct machine *machine; refcount_t refcnt; }; @@ -85,6 +79,7 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg) void map_groups__put(struct map_groups *mg); +struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct map_groups *map__kmaps(struct map *map); @@ -125,7 +120,7 @@ struct thread; * Note: caller must ensure map->dso is not NULL (map is loaded). */ #define map__for_each_symbol(map, pos, n) \ - dso__for_each_symbol(map->dso, pos, n, map->type) + dso__for_each_symbol(map->dso, pos, n) /* map__for_each_symbol_with_name - iterate over the symbols in the given map * that have the given name @@ -144,13 +139,13 @@ struct thread; #define map__for_each_symbol_by_name(map, sym_name, pos) \ __map__for_each_symbol_by_name(map, sym_name, (pos)) -void map__init(struct map *map, enum map_type type, +void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, - char *filename, enum map_type type, struct thread *thread); -struct map *map__new2(u64 start, struct dso *dso, enum map_type type); + char *filename, struct thread *thread); +struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); @@ -174,6 +169,7 @@ static inline void __map__zput(struct map **map) int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); +char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); @@ -185,8 +181,6 @@ void map__fixup_end(struct map *map); void map__reloc_vmlinux(struct map *map); -size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, - FILE *fp); void maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); @@ -197,34 +191,29 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct thread *thread, - struct map_groups *parent, enum map_type type); + struct map_groups *parent); size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); -int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, - u64 addr); +int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, + u64 addr); static inline void map_groups__insert(struct map_groups *mg, struct map *map) { - maps__insert(&mg->maps[map->type], map); + maps__insert(&mg->maps, map); map->groups = mg; } static inline void map_groups__remove(struct map_groups *mg, struct map *map) { - maps__remove(&mg->maps[map->type], map); + maps__remove(&mg->maps, map); } -static inline struct map *map_groups__find(struct map_groups *mg, - enum map_type type, u64 addr) +static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) { - return maps__find(&mg->maps[type], addr); + return maps__find(&mg->maps, addr); } -static inline struct map *map_groups__first(struct map_groups *mg, - enum map_type type) -{ - return maps__first(&mg->maps[type]); -} +struct map *map_groups__first(struct map_groups *mg); static inline struct map *map_groups__next(struct map *map) { @@ -232,11 +221,9 @@ static inline struct map *map_groups__next(struct map *map) } struct symbol *map_groups__find_symbol(struct map_groups *mg, - enum map_type type, u64 addr, - struct map **mapp); + u64 addr, struct map **mapp); struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, const char *name, struct map **mapp); @@ -244,24 +231,26 @@ struct addr_map_symbol; int map_groups__find_ams(struct addr_map_symbol *ams); -static inline -struct symbol *map_groups__find_function_by_name(struct map_groups *mg, - const char *name, struct map **mapp) -{ - return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp); -} - int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); -struct map *map_groups__find_by_name(struct map_groups *mg, - enum map_type type, const char *name); +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); bool __map__is_kernel(const struct map *map); +bool __map__is_extra_kernel_map(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { - return !__map__is_kernel(map); + return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map); +} + +bool map__has_symbols(const struct map *map); + +#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline" + +static inline bool is_entry_trampoline(const char *name) +{ + return !strcmp(name, ENTRY_TRAMPOLINE_NAME); } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b8b8a9558d32..15eec49e71a1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -156,13 +156,12 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { (strcmp(sys_dirent->d_name, ".")) && \ (strcmp(sys_dirent->d_name, ".."))) -static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) +static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) { char evt_path[MAXPATHLEN]; int fd; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, - sys_dir->d_name, evt_dir->d_name); + snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) return -EINVAL; @@ -171,12 +170,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ +#define for_each_event(dir_path, evt_dir, evt_dirent) \ while ((evt_dirent = readdir(evt_dir)) != NULL) \ if (evt_dirent->d_type == DT_DIR && \ (strcmp(evt_dirent->d_name, ".")) && \ (strcmp(evt_dirent->d_name, "..")) && \ - (!tp_event_has_id(sys_dirent, evt_dirent))) + (!tp_event_has_id(dir_path, evt_dirent))) #define MAX_EVENT_LENGTH 512 @@ -190,21 +189,21 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) int fd; u64 id; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return NULL; for_each_subsystem(sys_dir, sys_dirent) { - - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dirent->d_name); @@ -218,6 +217,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) close(fd); id = atoll(id_buf); if (id == config) { + put_events_file(dir_path); closedir(evt_dir); closedir(sys_dir); path = zalloc(sizeof(*path)); @@ -242,6 +242,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) } } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); @@ -512,14 +514,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { - char evt_path[MAXPATHLEN]; + char *evt_path; struct dirent *evt_ent; DIR *evt_dir; int ret = 0, found = 0; - snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); + evt_path = get_events_file(sys_name); + if (!evt_path) { + tracepoint_error(err, errno, sys_name, evt_name); + return -1; + } evt_dir = opendir(evt_path); if (!evt_dir) { + put_events_file(evt_path); tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -545,6 +552,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, ret = -1; } + put_events_file(evt_path); closedir(evt_dir); return ret; } @@ -570,7 +578,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, DIR *events_dir; int ret = 0; - events_dir = opendir(tracing_events_path); + events_dir = tracing_events__opendir(); if (!events_dir) { tracepoint_error(err, errno, sys_name, evt_name); return -1; @@ -1219,13 +1227,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats) + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias) { struct perf_event_attr attr; struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; struct parse_events_error *err = parse_state->error; + bool use_uncore_alias; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); @@ -1244,11 +1255,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, memset(&attr, 0, sizeof(attr)); } + use_uncore_alias = (pmu->is_uncore && use_alias); + if (!head_config) { attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); if (evsel) { evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; return 0; } else { return -ENOMEM; @@ -1282,6 +1296,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; } return evsel ? 0 : -ENOMEM; @@ -1317,7 +1332,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, list_add_tail(&term->list, head); if (!parse_events_add_pmu(parse_state, list, - pmu->name, head, true)) { + pmu->name, head, + true, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; @@ -1339,7 +1355,120 @@ int parse_events__modifier_group(struct list_head *list, return parse_events__modifier_event(list, event_mod, true); } -void parse_events__set_leader(char *name, struct list_head *list) +/* + * Check if the two uncore PMUs are from the same uncore block + * The format of the uncore PMU name is uncore_#blockname_#pmuidx + */ +static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b) +{ + char *end_a, *end_b; + + end_a = strrchr(pmu_name_a, '_'); + end_b = strrchr(pmu_name_b, '_'); + + if (!end_a || !end_b) + return false; + + if ((end_a - pmu_name_a) != (end_b - pmu_name_b)) + return false; + + return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0); +} + +static int +parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, + struct parse_events_state *parse_state) +{ + struct perf_evsel *evsel, *leader; + uintptr_t *leaders; + bool is_leader = true; + int i, nr_pmu = 0, total_members, ret = 0; + + leader = list_first_entry(list, struct perf_evsel, node); + evsel = list_last_entry(list, struct perf_evsel, node); + total_members = evsel->idx - leader->idx + 1; + + leaders = calloc(total_members, sizeof(uintptr_t)); + if (WARN_ON(!leaders)) + return 0; + + /* + * Going through the whole group and doing sanity check. + * All members must use alias, and be from the same uncore block. + * Also, storing the leader events in an array. + */ + __evlist__for_each_entry(list, evsel) { + + /* Only split the uncore group which members use alias */ + if (!evsel->use_uncore_alias) + goto out; + + /* The events must be from the same uncore block */ + if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name)) + goto out; + + if (!is_leader) + continue; + /* + * If the event's PMU name starts to repeat, it must be a new + * event. That can be used to distinguish the leader from + * other members, even they have the same event name. + */ + if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { + is_leader = false; + continue; + } + /* The name is always alias name */ + WARN_ON(strcmp(leader->name, evsel->name)); + + /* Store the leader event for each PMU */ + leaders[nr_pmu++] = (uintptr_t) evsel; + } + + /* only one event alias */ + if (nr_pmu == total_members) { + parse_state->nr_groups--; + goto handled; + } + + /* + * An uncore event alias is a joint name which means the same event + * runs on all PMUs of a block. + * Perf doesn't support mixed events from different PMUs in the same + * group. The big group has to be split into multiple small groups + * which only include the events from the same PMU. + * + * Here the uncore event aliases must be from the same uncore block. + * The number of PMUs must be same for each alias. The number of new + * small groups equals to the number of PMUs. + * Setting the leader event for corresponding members in each group. + */ + i = 0; + __evlist__for_each_entry(list, evsel) { + if (i >= nr_pmu) + i = 0; + evsel->leader = (struct perf_evsel *) leaders[i++]; + } + + /* The number of members and group name are same for each group */ + for (i = 0; i < nr_pmu; i++) { + evsel = (struct perf_evsel *) leaders[i]; + evsel->nr_members = total_members / nr_pmu; + evsel->group_name = name ? strdup(name) : NULL; + } + + /* Take the new small groups into account */ + parse_state->nr_groups += nr_pmu - 1; + +handled: + ret = 1; +out: + free(leaders); + return ret; +} + +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state) { struct perf_evsel *leader; @@ -1348,6 +1477,9 @@ void parse_events__set_leader(char *name, struct list_head *list) return; } + if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) + return; + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; @@ -1968,13 +2100,13 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, DIR *sys_dir, *evt_dir; struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; char **evt_list = NULL; unsigned int evt_i = 0, evt_num = 0; bool evt_num_known = false; restart: - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return; @@ -1989,13 +2121,14 @@ restart: !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { if (event_glob != NULL && !strglobmatch(evt_dirent->d_name, event_glob)) continue; @@ -2009,11 +2142,15 @@ restart: sys_dirent->d_name, evt_dirent->d_name); evt_list[evt_i] = strdup(evt_path); - if (evt_list[evt_i] == NULL) + if (evt_list[evt_i] == NULL) { + put_events_file(dir_path); goto out_close_evt_dir; + } evt_i++; } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); @@ -2061,21 +2198,21 @@ int is_valid_tracepoint(const char *event_string) DIR *sys_dir, *evt_dir; struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return 0; for_each_subsystem(sys_dir, sys_dirent) { - - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); if (!strcmp(evt_path, event_string)) { @@ -2085,6 +2222,8 @@ int is_valid_tracepoint(const char *event_string) } } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); return 0; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 5015cfd58277..4473dac27aee 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats); + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, @@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old, enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); -void parse_events__set_leader(char *name, struct list_head *list); +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_evlist_error(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index a1a01b1ac8b8..5f761f3ed0f3 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -53,7 +53,21 @@ static int str(yyscan_t scanner, int token) YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - yylval->str = strdup(text); + if (text[0] != '\'') { + yylval->str = strdup(text); + } else { + /* + * If a text tag specified on the command line + * contains opening single quite ' then it is + * expected that the tag ends with single quote + * as well, like this: + * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' + * quotes need to be escaped to bypass shell + * processing. + */ + yylval->str = strndup(&text[1], strlen(text) - 2); + } + return token; } @@ -176,6 +190,7 @@ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* +name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ @@ -344,6 +359,7 @@ r{num_raw_hex} { return raw(yyscanner); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } +{name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } - { return '-'; } , { BEGIN(event); return ','; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 7afeb80cc39e..da8fe57691b8 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -73,6 +73,7 @@ static void inc_group_count(struct list_head *list, %type <num> value_sym %type <head> event_config %type <head> opt_event_config +%type <head> opt_pmu_config %type <term> event_term %type <head> event_pmu %type <head> event_legacy_symbol @@ -161,7 +162,7 @@ PE_NAME '{' events '}' struct list_head *list = $3; inc_group_count(list, _parse_state); - parse_events__set_leader($1, list); + parse_events__set_leader($1, list, _parse_state); $$ = list; } | @@ -170,7 +171,7 @@ PE_NAME '{' events '}' struct list_head *list = $2; inc_group_count(list, _parse_state); - parse_events__set_leader(NULL, list); + parse_events__set_leader(NULL, list, _parse_state); $$ = list; } @@ -224,15 +225,20 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME opt_event_config +PE_NAME opt_pmu_config { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list, *orig_terms, *terms; if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; + if (error) + error->idx = @1.first_column; + ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -251,7 +257,7 @@ PE_NAME opt_event_config free(pattern); YYABORT; } - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) ok++; parse_events_terms__delete(terms); } @@ -496,6 +502,17 @@ opt_event_config: $$ = NULL; } +opt_pmu_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_state *parse_state = _parse_state; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d2fb597c9a8c..3ba6a1742f91 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -234,6 +234,74 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } +static void perf_pmu_assign_str(char *name, const char *field, char **old_str, + char **new_str) +{ + if (!*old_str) + goto set_new; + + if (*new_str) { /* Have new string, check with old */ + if (strcasecmp(*old_str, *new_str)) + pr_debug("alias %s differs in field '%s'\n", + name, field); + zfree(old_str); + } else /* Nothing new --> keep old string */ + return; +set_new: + *old_str = *new_str; + *new_str = NULL; +} + +static void perf_pmu_update_alias(struct perf_pmu_alias *old, + struct perf_pmu_alias *newalias) +{ + perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc); + perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, + &newalias->long_desc); + perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); + perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr, + &newalias->metric_expr); + perf_pmu_assign_str(old->name, "metric_name", &old->metric_name, + &newalias->metric_name); + perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); + old->scale = newalias->scale; + old->per_pkg = newalias->per_pkg; + old->snapshot = newalias->snapshot; + memcpy(old->unit, newalias->unit, sizeof(old->unit)); +} + +/* Delete an alias entry. */ +static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +{ + zfree(&newalias->name); + zfree(&newalias->desc); + zfree(&newalias->long_desc); + zfree(&newalias->topic); + zfree(&newalias->str); + zfree(&newalias->metric_expr); + zfree(&newalias->metric_name); + parse_events_terms__purge(&newalias->terms); + free(newalias); +} + +/* Merge an alias, search in alias list. If this name is already + * present merge both of them to combine all information. + */ +static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, + struct list_head *alist) +{ + struct perf_pmu_alias *a; + + list_for_each_entry(a, alist, list) { + if (!strcasecmp(newalias->name, a->name)) { + perf_pmu_update_alias(a, newalias); + perf_pmu_free_alias(newalias); + return true; + } + } + return false; +} + static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, @@ -241,9 +309,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *metric_expr, char *metric_name) { + struct parse_events_term *term; struct perf_pmu_alias *alias; int ret; int num; + char newval[256]; alias = malloc(sizeof(*alias)); if (!alias) @@ -262,6 +332,27 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, return ret; } + /* Scan event and remove leading zeroes, spaces, newlines, some + * platforms have terms specified as + * event=0x0091 (read from files ../<PMU>/events/<FILE> + * and terms specified as event=0x91 (read from JSON files). + * + * Rebuild string to make alias->str member comparable. + */ + memset(newval, 0, sizeof(newval)); + ret = 0; + list_for_each_entry(term, &alias->terms, list) { + if (ret) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + ","); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%#x", term->config, term->val.num); + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%s", term->config, term->val.str); + } + alias->name = strdup(name); if (dir) { /* @@ -285,9 +376,10 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; - alias->str = strdup(val); + alias->str = strdup(newval); - list_add_tail(&alias->list, list); + if (!perf_pmu_merge_alias(alias, list)) + list_add_tail(&alias->list, list); return 0; } @@ -303,6 +395,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; + /* Remove trailing newline from sysfs file */ + rtrim(buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, NULL, NULL, NULL); } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e1dbc9821617..f119eb628dbb 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -111,17 +111,6 @@ void exit_probe_symbol_maps(void) symbol__exit(); } -static struct symbol *__find_kernel_function_by_name(const char *name, - struct map **mapp) -{ - return machine__find_kernel_function_by_name(host_machine, name, mapp); -} - -static struct symbol *__find_kernel_function(u64 addr, struct map **mapp) -{ - return machine__find_kernel_function(host_machine, addr, mapp); -} - static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) { /* kmap->ref_reloc_sym should be set if host_machine is initialized */ @@ -149,7 +138,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, if (reloc_sym && strcmp(name, reloc_sym->name) == 0) *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; else { - sym = __find_kernel_function_by_name(name, &map); + sym = machine__find_kernel_symbol_by_name(host_machine, name, &map); if (!sym) return -ENOENT; *addr = map->unmap_ip(map, sym->start) - @@ -161,8 +150,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, static struct map *kernel_get_module_map(const char *module) { - struct map_groups *grp = &host_machine->kmaps; - struct maps *maps = &grp->maps[MAP__FUNCTION]; + struct maps *maps = machine__kernel_maps(host_machine); struct map *pos; /* A file path -- this is an offline module */ @@ -177,8 +165,7 @@ static struct map *kernel_get_module_map(const char *module) if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0 && module[pos->dso->short_name_len - 2] == '\0') { - map__get(pos); - return pos; + return map__get(pos); } } return NULL; @@ -341,7 +328,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) char module_name[128]; snprintf(module_name, sizeof(module_name), "[%s]", module); - map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); + map = map_groups__find_by_name(&host_machine->kmaps, module_name); if (map) { dso = map->dso; goto found; @@ -2098,7 +2085,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, } if (addr) { addr += tp->offset; - sym = __find_kernel_function(addr, &map); + sym = machine__find_kernel_symbol(host_machine, addr, &map); } } @@ -3504,19 +3491,18 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, (target) ? : "kernel"); goto end; } - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); + if (!dso__sorted_by_name(map->dso)) + dso__sort_by_name(map->dso); /* Show all (filtered) symbols */ setup_pager(); - for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); if (strfilter__compare(_filter, pos->sym.name)) printf("%s\n", pos->sym.name); - } - + } end: map__put(map); exit_probe_symbol_maps(); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 4ae1123c6794..b76088fadf3d 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -84,8 +84,7 @@ int open_trace_file(const char *trace_file, bool readwrite) char buf[PATH_MAX]; int ret; - ret = e_snprintf(buf, PATH_MAX, "%s/%s", - tracing_path, trace_file); + ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c deleted file mode 100644 index 22eaa201aa27..000000000000 --- a/tools/perf/util/quote.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <errno.h> -#include <stdlib.h> -#include "strbuf.h" -#include "quote.h" -#include "util.h" - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * - * E.g. - * original sq_quote result - * name ==> name ==> 'name' - * a b ==> a b ==> 'a b' - * a'b ==> a'\''b ==> 'a'\''b' - * a!b ==> a'\!'b ==> 'a'\!'b' - */ -static inline int need_bs_quote(char c) -{ - return (c == '\'' || c == '!'); -} - -static int sq_quote_buf(struct strbuf *dst, const char *src) -{ - char *to_free = NULL; - int ret; - - if (dst->buf == src) - to_free = strbuf_detach(dst, NULL); - - ret = strbuf_addch(dst, '\''); - while (!ret && *src) { - size_t len = strcspn(src, "'!"); - ret = strbuf_add(dst, src, len); - src += len; - while (!ret && need_bs_quote(*src)) - ret = strbuf_addf(dst, "'\\%c\'", *src++); - } - if (!ret) - ret = strbuf_addch(dst, '\''); - free(to_free); - - return ret; -} - -int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) -{ - int i, ret; - - /* Copy into destination buffer. */ - ret = strbuf_grow(dst, 255); - for (i = 0; !ret && argv[i]; ++i) { - ret = strbuf_addch(dst, ' '); - if (ret) - break; - ret = sq_quote_buf(dst, argv[i]); - if (maxlen && dst->len > maxlen) - return -ENOSPC; - } - return ret; -} diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h deleted file mode 100644 index 274bf26d3511..000000000000 --- a/tools/perf/util/quote.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_QUOTE_H -#define __PERF_QUOTE_H - -#include <stddef.h> - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * single quote pair. - * - * For example, if you are passing the result to system() as an - * argument: - * - * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) - * - * would be appropriate. If the system() is going to call ssh to - * run the command on the other side: - * - * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); - * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); - * - * Note that the above examples leak memory! Remember to free result from - * sq_quote() in a real application. - */ - -struct strbuf; - -int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); - -#endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 10dd5fce082b..bc32e57d17be 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -48,6 +48,7 @@ #include "cpumap.h" #include "print_binary.h" #include "stat.h" +#include "mem-events.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -372,6 +373,19 @@ static PyObject *get_field_numeric_entry(struct event_format *event, return obj; } +static const char *get_dsoname(struct map *map) +{ + const char *dsoname = "[unknown]"; + + if (map && map->dso) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else + dsoname = map->dso->name; + } + + return dsoname; +} static PyObject *python_process_callchain(struct perf_sample *sample, struct perf_evsel *evsel, @@ -427,14 +441,8 @@ static PyObject *python_process_callchain(struct perf_sample *sample, } if (node->map) { - struct map *map = node->map; - const char *dsoname = "[unknown]"; - if (map && map->dso) { - if (symbol_conf.show_kernel_path && map->dso->long_name) - dsoname = map->dso->long_name; - else - dsoname = map->dso->name; - } + const char *dsoname = get_dsoname(node->map); + pydict_set_item_string_decref(pyelem, "dso", _PyUnicode_FromString(dsoname)); } @@ -448,6 +456,166 @@ exit: return pylist; } +static PyObject *python_process_brstack(struct perf_sample *sample, + struct thread *thread) +{ + struct branch_stack *br = sample->branch_stack; + PyObject *pylist; + u64 i; + + pylist = PyList_New(0); + if (!pylist) + Py_FatalError("couldn't create Python list"); + + if (!(br && br->nr)) + goto exit; + + for (i = 0; i < br->nr; i++) { + PyObject *pyelem; + struct addr_location al; + const char *dsoname; + + pyelem = PyDict_New(); + if (!pyelem) + Py_FatalError("couldn't create Python dictionary"); + + pydict_set_item_string_decref(pyelem, "from", + PyLong_FromUnsignedLongLong(br->entries[i].from)); + pydict_set_item_string_decref(pyelem, "to", + PyLong_FromUnsignedLongLong(br->entries[i].to)); + pydict_set_item_string_decref(pyelem, "mispred", + PyBool_FromLong(br->entries[i].flags.mispred)); + pydict_set_item_string_decref(pyelem, "predicted", + PyBool_FromLong(br->entries[i].flags.predicted)); + pydict_set_item_string_decref(pyelem, "in_tx", + PyBool_FromLong(br->entries[i].flags.in_tx)); + pydict_set_item_string_decref(pyelem, "abort", + PyBool_FromLong(br->entries[i].flags.abort)); + pydict_set_item_string_decref(pyelem, "cycles", + PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + + thread__find_map(thread, sample->cpumode, + br->entries[i].from, &al); + dsoname = get_dsoname(al.map); + pydict_set_item_string_decref(pyelem, "from_dsoname", + _PyUnicode_FromString(dsoname)); + + thread__find_map(thread, sample->cpumode, + br->entries[i].to, &al); + dsoname = get_dsoname(al.map); + pydict_set_item_string_decref(pyelem, "to_dsoname", + _PyUnicode_FromString(dsoname)); + + PyList_Append(pylist, pyelem); + Py_DECREF(pyelem); + } + +exit: + return pylist; +} + +static unsigned long get_offset(struct symbol *sym, struct addr_location *al) +{ + unsigned long offset; + + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + + return offset; +} + +static int get_symoff(struct symbol *sym, struct addr_location *al, + bool print_off, char *bf, int size) +{ + unsigned long offset; + + if (!sym || !sym->name[0]) + return scnprintf(bf, size, "%s", "[unknown]"); + + if (!print_off) + return scnprintf(bf, size, "%s", sym->name); + + offset = get_offset(sym, al); + + return scnprintf(bf, size, "%s+0x%x", sym->name, offset); +} + +static int get_br_mspred(struct branch_flags *flags, char *bf, int size) +{ + if (!flags->mispred && !flags->predicted) + return scnprintf(bf, size, "%s", "-"); + + if (flags->mispred) + return scnprintf(bf, size, "%s", "M"); + + return scnprintf(bf, size, "%s", "P"); +} + +static PyObject *python_process_brstacksym(struct perf_sample *sample, + struct thread *thread) +{ + struct branch_stack *br = sample->branch_stack; + PyObject *pylist; + u64 i; + char bf[512]; + struct addr_location al; + + pylist = PyList_New(0); + if (!pylist) + Py_FatalError("couldn't create Python list"); + + if (!(br && br->nr)) + goto exit; + + for (i = 0; i < br->nr; i++) { + PyObject *pyelem; + + pyelem = PyDict_New(); + if (!pyelem) + Py_FatalError("couldn't create Python dictionary"); + + thread__find_symbol(thread, sample->cpumode, + br->entries[i].from, &al); + get_symoff(al.sym, &al, true, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "from", + _PyUnicode_FromString(bf)); + + thread__find_symbol(thread, sample->cpumode, + br->entries[i].to, &al); + get_symoff(al.sym, &al, true, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "to", + _PyUnicode_FromString(bf)); + + get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "pred", + _PyUnicode_FromString(bf)); + + if (br->entries[i].flags.in_tx) { + pydict_set_item_string_decref(pyelem, "in_tx", + _PyUnicode_FromString("X")); + } else { + pydict_set_item_string_decref(pyelem, "in_tx", + _PyUnicode_FromString("-")); + } + + if (br->entries[i].flags.abort) { + pydict_set_item_string_decref(pyelem, "abort", + _PyUnicode_FromString("A")); + } else { + pydict_set_item_string_decref(pyelem, "abort", + _PyUnicode_FromString("-")); + } + + PyList_Append(pylist, pyelem); + Py_DECREF(pyelem); + } + +exit: + return pylist; +} + static PyObject *get_sample_value_as_tuple(struct sample_read_value *value) { PyObject *t; @@ -498,12 +666,63 @@ static void set_sample_read_in_dict(PyObject *dict_sample, pydict_set_item_string_decref(dict_sample, "values", values); } +static void set_sample_datasrc_in_dict(PyObject *dict, + struct perf_sample *sample) +{ + struct mem_info mi = { .data_src.val = sample->data_src }; + char decode[100]; + + pydict_set_item_string_decref(dict, "datasrc", + PyLong_FromUnsignedLongLong(sample->data_src)); + + perf_script__meminfo_scnprintf(decode, 100, &mi); + + pydict_set_item_string_decref(dict, "datasrc_decode", + _PyUnicode_FromString(decode)); +} + +static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +{ + unsigned int i = 0, r; + int printed = 0; + + bf[0] = 0; + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + + printed += scnprintf(bf + printed, size - printed, + "%5s:0x%" PRIx64 " ", + perf_reg_name(r), val); + } + + return printed; +} + +static void set_regs_in_dict(PyObject *dict, + struct perf_sample *sample, + struct perf_evsel *evsel) +{ + struct perf_event_attr *attr = &evsel->attr; + char bf[512]; + + regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); + + pydict_set_item_string_decref(dict, "iregs", + _PyUnicode_FromString(bf)); + + regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); + + pydict_set_item_string_decref(dict, "uregs", + _PyUnicode_FromString(bf)); +} + static PyObject *get_perf_sample_dict(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, PyObject *callchain) { - PyObject *dict, *dict_sample; + PyObject *dict, *dict_sample, *brstack, *brstacksym; dict = PyDict_New(); if (!dict) @@ -531,7 +750,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->period)); pydict_set_item_string_decref(dict_sample, "phys_addr", PyLong_FromUnsignedLongLong(sample->phys_addr)); + pydict_set_item_string_decref(dict_sample, "addr", + PyLong_FromUnsignedLongLong(sample->addr)); set_sample_read_in_dict(dict_sample, sample, evsel); + pydict_set_item_string_decref(dict_sample, "weight", + PyLong_FromUnsignedLongLong(sample->weight)); + pydict_set_item_string_decref(dict_sample, "transaction", + PyLong_FromUnsignedLongLong(sample->transaction)); + set_sample_datasrc_in_dict(dict_sample, sample); pydict_set_item_string_decref(dict, "sample", dict_sample); pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize( @@ -549,6 +775,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "callchain", callchain); + brstack = python_process_brstack(sample, al->thread); + pydict_set_item_string_decref(dict, "brstack", brstack); + + brstacksym = python_process_brstacksym(sample, al->thread); + pydict_set_item_string_decref(dict, "brstacksym", brstacksym); + + set_regs_in_dict(dict, sample, evsel); + return dict; } @@ -674,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); - if (!dict) { + if (!dict) call_object(handler, t, handler_name); - } else { + else call_object(handler, t, default_handler_name); - Py_DECREF(dict); - } - Py_XDECREF(all_entries_dict); Py_DECREF(t); } @@ -1001,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample, call_object(handler, t, handler_name); - Py_DECREF(dict); Py_DECREF(t); } @@ -1393,6 +1623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); + fprintf(ofp, "from __future__ import print_function\n\n"); fprintf(ofp, "import os\n"); fprintf(ofp, "import sys\n\n"); @@ -1402,10 +1633,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "from Core import *\n\n\n"); fprintf(ofp, "def trace_begin():\n"); - fprintf(ofp, "\tprint \"in trace_begin\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_begin\")\n\n"); fprintf(ofp, "def trace_end():\n"); - fprintf(ofp, "\tprint \"in trace_end\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "def %s__%s(", event->system, event->name); @@ -1441,7 +1672,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) "common_secs, common_nsecs,\n\t\t\t" "common_pid, common_comm)\n\n"); - fprintf(ofp, "\t\tprint \""); + fprintf(ofp, "\t\tprint(\""); not_first = 0; count = 0; @@ -1502,31 +1733,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s", f->name); } - fprintf(ofp, ")\n\n"); + fprintf(ofp, "))\n\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); - fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])"); + fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))"); fprintf(ofp, "\n\t\t\telse:"); - fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n"); - fprintf(ofp, "\t\tprint \"\\n\"\n\n"); + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n"); + fprintf(ofp, "\t\tprint()\n\n"); } fprintf(ofp, "def trace_unhandled(event_name, context, " "event_fields_dict, perf_sample_dict):\n"); - fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "def print_header(" "event_name, cpu, secs, nsecs, pid, comm):\n" - "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" - "(event_name, cpu, secs, nsecs, pid, comm),\n\n"); + "\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" + "(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n"); fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n" "\treturn delimiter.join" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f4a7a437ee87..8b9369303561 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1094,7 +1094,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) @@ -1973,12 +1973,11 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) return false; } -int maps__set_kallsyms_ref_reloc_sym(struct map **maps, - const char *symbol_name, u64 addr) +int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr) { char *bracket; - int i; struct ref_reloc_sym *ref; + struct kmap *kmap; ref = zalloc(sizeof(struct ref_reloc_sym)); if (ref == NULL) @@ -1996,13 +1995,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, ref->addr = addr; - for (i = 0; i < MAP__NR_TYPES; ++i) { - struct kmap *kmap = map__kmap(maps[i]); - - if (!kmap) - continue; + kmap = map__kmap(map); + if (kmap) kmap->ref_reloc_sym = ref; - } return 0; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 26a68dfd8a4f..fed2952ab45a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2,7 +2,7 @@ #include <errno.h> #include <inttypes.h> #include <regex.h> -#include <sys/mman.h> +#include <linux/mman.h> #include "sort.h" #include "hist.h" #include "comm.h" @@ -282,7 +282,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); if (sym && map) { - if (map->type == MAP__VARIABLE) { + if (sym->type == STT_OBJECT) { ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); @@ -331,24 +331,18 @@ struct sort_entry sort_sym = { /* --sort srcline */ -char *hist_entry__get_srcline(struct hist_entry *he) +char *hist_entry__srcline(struct hist_entry *he) { - struct map *map = he->ms.map; - - if (!map) - return SRCLINE_UNKNOWN; - - return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true, he->ip); + return map__srcline(he->ms.map, he->ip, he->ms.sym); } static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { if (!left->srcline) - left->srcline = hist_entry__get_srcline(left); + left->srcline = hist_entry__srcline(left); if (!right->srcline) - right->srcline = hist_entry__get_srcline(right); + right->srcline = hist_entry__srcline(right); return strcmp(right->srcline, left->srcline); } @@ -357,7 +351,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { if (!he->srcline) - he->srcline = hist_entry__get_srcline(he); + he->srcline = hist_entry__srcline(he); return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); } @@ -371,33 +365,20 @@ struct sort_entry sort_srcline = { /* --sort srcline_from */ +static char *addr_map_symbol__srcline(struct addr_map_symbol *ams) +{ + return map__srcline(ams->map, ams->al_addr, ams->sym); +} + static int64_t sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->branch_info->srcline_from) { - struct map *map = left->branch_info->from.map; - if (!map) - left->branch_info->srcline_from = SRCLINE_UNKNOWN; - else - left->branch_info->srcline_from = get_srcline(map->dso, - map__rip_2objdump(map, - left->branch_info->from.al_addr), - left->branch_info->from.sym, - true, true, - left->branch_info->from.al_addr); - } - if (!right->branch_info->srcline_from) { - struct map *map = right->branch_info->from.map; - if (!map) - right->branch_info->srcline_from = SRCLINE_UNKNOWN; - else - right->branch_info->srcline_from = get_srcline(map->dso, - map__rip_2objdump(map, - right->branch_info->from.al_addr), - right->branch_info->from.sym, - true, true, - right->branch_info->from.al_addr); - } + if (!left->branch_info->srcline_from) + left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from); + + if (!right->branch_info->srcline_from) + right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from); + return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -419,30 +400,12 @@ struct sort_entry sort_srcline_from = { static int64_t sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->branch_info->srcline_to) { - struct map *map = left->branch_info->to.map; - if (!map) - left->branch_info->srcline_to = SRCLINE_UNKNOWN; - else - left->branch_info->srcline_to = get_srcline(map->dso, - map__rip_2objdump(map, - left->branch_info->to.al_addr), - left->branch_info->from.sym, - true, true, - left->branch_info->to.al_addr); - } - if (!right->branch_info->srcline_to) { - struct map *map = right->branch_info->to.map; - if (!map) - right->branch_info->srcline_to = SRCLINE_UNKNOWN; - else - right->branch_info->srcline_to = get_srcline(map->dso, - map__rip_2objdump(map, - right->branch_info->to.al_addr), - right->branch_info->to.sym, - true, true, - right->branch_info->to.al_addr); - } + if (!left->branch_info->srcline_to) + left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to); + + if (!right->branch_info->srcline_to) + right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to); + return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -1211,7 +1174,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, /* print [s] for shared data mmaps */ if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && - map && (map->type == MAP__VARIABLE) && + map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && (map->maj || map->min || map->ino || map->ino_generation)) @@ -2582,7 +2545,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0) + if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0) return -EINVAL; if (sd->entry == &sort_mem_daddr_sym) @@ -2628,7 +2591,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, if (*tok) { ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { - if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) + if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); else pr_err("Invalid --sort key: `%s'", tok); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 035b62e2c60b..8bf302cafcec 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -112,6 +112,8 @@ struct hist_entry { char level; u8 filtered; + + u16 callchain_size; union { /* * Since perf diff only supports the stdio output, TUI @@ -151,6 +153,11 @@ struct hist_entry { struct callchain_root callchain[0]; /* must be last member */ }; +static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) +{ + return he->callchain_size != 0; +} + static inline bool hist_entry__has_pairs(struct hist_entry *he) { return !list_empty(&he->pairs.node); @@ -186,13 +193,13 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he) static inline u64 cl_address(u64 address) { /* return the cacheline of the address */ - return (address & ~(cacheline_size - 1)); + return (address & ~(cacheline_size() - 1)); } static inline u64 cl_offset(u64 address) { /* return the cacheline of the address */ - return (address & (cacheline_size - 1)); + return (address & (cacheline_size() - 1)); } enum sort_mode { @@ -292,5 +299,5 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); -char *hist_entry__get_srcline(struct hist_entry *he); +char *hist_entry__srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 3c21fd059b64..09d6746e6ec8 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -103,6 +103,7 @@ static struct symbol *new_inline_sym(struct dso *dso, inline_sym = symbol__new(base_sym ? base_sym->start : 0, base_sym ? base_sym->end : 0, base_sym ? base_sym->binding : 0, + base_sym ? base_sym->type : 0, funcname); if (inline_sym) inline_sym->inlined = 1; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 8f56ba4fd258..36efb986f7fc 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,8 +7,7 @@ #include "xyarray.h" #include "rblist.h" -struct stats -{ +struct stats { double n, mean, M2; u64 max, min; }; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2de770511e70..29770ea61768 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -114,16 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) sym->st_shndx != SHN_ABS; } -static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) +static bool elf_sym__filter(GElf_Sym *sym) { - switch (type) { - case MAP__FUNCTION: - return elf_sym__is_function(sym); - case MAP__VARIABLE: - return elf_sym__is_object(sym); - default: - return false; - } + return elf_sym__is_function(sym) || elf_sym__is_object(sym); } static inline const char *elf_sym__name(const GElf_Sym *sym, @@ -150,17 +143,10 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr, return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; } -static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, - enum map_type type) +static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs) { - switch (type) { - case MAP__FUNCTION: - return elf_sec__is_text(shdr, secstrs); - case MAP__VARIABLE: - return elf_sec__is_data(shdr, secstrs); - default: - return false; - } + return elf_sec__is_text(shdr, secstrs) || + elf_sec__is_data(shdr, secstrs); } static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) @@ -256,7 +242,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map) +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -364,12 +350,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * free(demangled); f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, sympltname); + STB_GLOBAL, STT_FUNC, sympltname); if (!f) goto out_elf_end; plt_offset += plt_entry_size; - symbols__insert(&dso->symbols[map->type], f); + symbols__insert(&dso->symbols, f); ++nr; } } else if (shdr_rel_plt.sh_type == SHT_REL) { @@ -390,12 +376,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * free(demangled); f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, sympltname); + STB_GLOBAL, STT_FUNC, sympltname); if (!f) goto out_elf_end; plt_offset += plt_entry_size; - symbols__insert(&dso->symbols[map->type], f); + symbols__insert(&dso->symbols, f); ++nr; } } @@ -811,6 +797,110 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +static int dso__process_kernel_symbol(struct dso *dso, struct map *map, + GElf_Sym *sym, GElf_Shdr *shdr, + struct map_groups *kmaps, struct kmap *kmap, + struct dso **curr_dsop, struct map **curr_mapp, + const char *section_name, + bool adjust_kernel_syms, bool kmodule, bool *remap_kernel) +{ + struct dso *curr_dso = *curr_dsop; + struct map *curr_map; + char dso_name[PATH_MAX]; + + /* Adjust symbol to map to file offset */ + if (adjust_kernel_syms) + sym->st_value -= shdr->sh_addr - shdr->sh_offset; + + if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0) + return 0; + + if (strcmp(section_name, ".text") == 0) { + /* + * The initial kernel mapping is based on + * kallsyms and identity maps. Overwrite it to + * map to the kernel dso. + */ + if (*remap_kernel && dso->kernel) { + *remap_kernel = false; + map->start = shdr->sh_addr + ref_reloc(kmap); + map->end = map->start + shdr->sh_size; + map->pgoff = shdr->sh_offset; + map->map_ip = map__map_ip; + map->unmap_ip = map__unmap_ip; + /* Ensure maps are correctly ordered */ + if (kmaps) { + map__get(map); + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + map__put(map); + } + } + + /* + * The initial module mapping is based on + * /proc/modules mapped to offset zero. + * Overwrite it to map to the module dso. + */ + if (*remap_kernel && kmodule) { + *remap_kernel = false; + map->pgoff = shdr->sh_offset; + } + + *curr_mapp = map; + *curr_dsop = dso; + return 0; + } + + if (!kmap) + return 0; + + snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); + + curr_map = map_groups__find_by_name(kmaps, dso_name); + if (curr_map == NULL) { + u64 start = sym->st_value; + + if (kmodule) + start += map->start + shdr->sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + return -1; + curr_dso->kernel = dso->kernel; + curr_dso->long_name = dso->long_name; + curr_dso->long_name_len = dso->long_name_len; + curr_map = map__new2(start, curr_dso); + dso__put(curr_dso); + if (curr_map == NULL) + return -1; + + if (adjust_kernel_syms) { + curr_map->start = shdr->sh_addr + ref_reloc(kmap); + curr_map->end = curr_map->start + shdr->sh_size; + curr_map->pgoff = shdr->sh_offset; + } else { + curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; + } + curr_dso->symtab_type = dso->symtab_type; + map_groups__insert(kmaps, curr_map); + /* + * Add it before we drop the referece to curr_map, i.e. while + * we still are sure to have a reference to this DSO via + * *curr_map->dso. + */ + dsos__add(&map->groups->machine->dsos, curr_dso); + /* kmaps already got it */ + map__put(curr_map); + dso__set_loaded(curr_dso); + *curr_mapp = curr_map; + *curr_dsop = curr_dso; + } else + *curr_dsop = curr_map->dso; + + return 0; +} + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { @@ -844,7 +934,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * have the wrong values for the dso maps, so remove them. */ if (kmodule && syms_ss->symtab) - symbols__delete(&dso->symbols[map->type]); + symbols__delete(&dso->symbols); if (!syms_ss->symtab) { /* @@ -921,10 +1011,10 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* - * Initial kernel and module mappings do not map to the dso. For - * function mappings, flag the fixups. + * Initial kernel and module mappings do not map to the dso. + * Flag the fixups. */ - if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) { + if (dso->kernel || kmodule) { remap_kernel = true; adjust_kernel_syms = dso->adjust_symbols; } @@ -936,7 +1026,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, const char *section_name; bool used_opd = false; - if (!is_label && !elf_sym__is_a(&sym, map->type)) + if (!is_label && !elf_sym__filter(&sym)) continue; /* Reject ARM ELF "mapping symbols": these aren't unique and @@ -974,7 +1064,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, gelf_getshdr(sec, &shdr); - if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) + if (is_label && !elf_sec__filter(&shdr, secstrs)) continue; section_name = elf_sec__name(&shdr, secstrs); @@ -982,134 +1072,37 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, /* On ARM, symbols for thumb functions have 1 added to * the symbol address as a flag - remove it */ if ((ehdr.e_machine == EM_ARM) && - (map->type == MAP__FUNCTION) && + (GELF_ST_TYPE(sym.st_info) == STT_FUNC) && (sym.st_value & 1)) --sym.st_value; if (dso->kernel || kmodule) { - char dso_name[PATH_MAX]; - - /* Adjust symbol to map to file offset */ - if (adjust_kernel_syms) - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - - if (strcmp(section_name, - (curr_dso->short_name + - dso->short_name_len)) == 0) - goto new_symbol; - - if (strcmp(section_name, ".text") == 0) { - /* - * The initial kernel mapping is based on - * kallsyms and identity maps. Overwrite it to - * map to the kernel dso. - */ - if (remap_kernel && dso->kernel) { - remap_kernel = false; - map->start = shdr.sh_addr + - ref_reloc(kmap); - map->end = map->start + shdr.sh_size; - map->pgoff = shdr.sh_offset; - map->map_ip = map__map_ip; - map->unmap_ip = map__unmap_ip; - /* Ensure maps are correctly ordered */ - if (kmaps) { - map__get(map); - map_groups__remove(kmaps, map); - map_groups__insert(kmaps, map); - map__put(map); - } - } - - /* - * The initial module mapping is based on - * /proc/modules mapped to offset zero. - * Overwrite it to map to the module dso. - */ - if (remap_kernel && kmodule) { - remap_kernel = false; - map->pgoff = shdr.sh_offset; - } - - curr_map = map; - curr_dso = dso; - goto new_symbol; - } - - if (!kmap) - goto new_symbol; - - snprintf(dso_name, sizeof(dso_name), - "%s%s", dso->short_name, section_name); - - curr_map = map_groups__find_by_name(kmaps, map->type, dso_name); - if (curr_map == NULL) { - u64 start = sym.st_value; - - if (kmodule) - start += map->start + shdr.sh_offset; - - curr_dso = dso__new(dso_name); - if (curr_dso == NULL) - goto out_elf_end; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; - curr_map = map__new2(start, curr_dso, - map->type); - dso__put(curr_dso); - if (curr_map == NULL) { - goto out_elf_end; - } - if (adjust_kernel_syms) { - curr_map->start = shdr.sh_addr + - ref_reloc(kmap); - curr_map->end = curr_map->start + - shdr.sh_size; - curr_map->pgoff = shdr.sh_offset; - } else { - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; - } - curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmaps, curr_map); - /* - * Add it before we drop the referece to curr_map, - * i.e. while we still are sure to have a reference - * to this DSO via curr_map->dso. - */ - dsos__add(&map->groups->machine->dsos, curr_dso); - /* kmaps already got it */ - map__put(curr_map); - dso__set_loaded(curr_dso, map->type); - } else - curr_dso = curr_map->dso; - - goto new_symbol; - } - - if ((used_opd && runtime_ss->adjust_symbols) - || (!used_opd && syms_ss->adjust_symbols)) { + if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map, + section_name, adjust_kernel_syms, kmodule, &remap_kernel)) + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } -new_symbol: + demangled = demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; f = symbol__new(sym.st_value, sym.st_size, - GELF_ST_BIND(sym.st_info), elf_name); + GELF_ST_BIND(sym.st_info), + GELF_ST_TYPE(sym.st_info), elf_name); free(demangled); if (!f) goto out_elf_end; arch__sym_update(f, &sym); - __symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel); + __symbols__insert(&curr_dso->symbols, f, dso->kernel); nr++; } @@ -1117,14 +1110,14 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_end(&dso->symbols[map->type]); - symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); if (kmap) { /* * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - __map_groups__fixup_end(kmaps, map->type); + map_groups__fixup_end(kmaps); } } err = nr; @@ -1393,8 +1386,16 @@ static off_t kcore__write(struct kcore *kcore) struct phdr_data { off_t offset; + off_t rel; u64 addr; u64 len; + struct list_head node; + struct phdr_data *remaps; +}; + +struct sym_data { + u64 addr; + struct list_head node; }; struct kcore_copy_info { @@ -1404,16 +1405,78 @@ struct kcore_copy_info { u64 last_symbol; u64 first_module; u64 last_module_symbol; - struct phdr_data kernel_map; - struct phdr_data modules_map; + size_t phnum; + struct list_head phdrs; + struct list_head syms; }; +#define kcore_copy__for_each_phdr(k, p) \ + list_for_each_entry((p), &(k)->phdrs, node) + +static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset) +{ + struct phdr_data *p = zalloc(sizeof(*p)); + + if (p) { + p->addr = addr; + p->len = len; + p->offset = offset; + } + + return p; +} + +static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci, + u64 addr, u64 len, + off_t offset) +{ + struct phdr_data *p = phdr_data__new(addr, len, offset); + + if (p) + list_add_tail(&p->node, &kci->phdrs); + + return p; +} + +static void kcore_copy__free_phdrs(struct kcore_copy_info *kci) +{ + struct phdr_data *p, *tmp; + + list_for_each_entry_safe(p, tmp, &kci->phdrs, node) { + list_del(&p->node); + free(p); + } +} + +static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci, + u64 addr) +{ + struct sym_data *s = zalloc(sizeof(*s)); + + if (s) { + s->addr = addr; + list_add_tail(&s->node, &kci->syms); + } + + return s; +} + +static void kcore_copy__free_syms(struct kcore_copy_info *kci) +{ + struct sym_data *s, *tmp; + + list_for_each_entry_safe(s, tmp, &kci->syms, node) { + list_del(&s->node); + free(s); + } +} + static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, u64 start) { struct kcore_copy_info *kci = arg; - if (!symbol_type__is_a(type, MAP__FUNCTION)) + if (!kallsyms__is_function(type)) return 0; if (strchr(name, '[')) { @@ -1438,6 +1501,9 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, return 0; } + if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start)) + return -1; + return 0; } @@ -1487,27 +1553,39 @@ static int kcore_copy__parse_modules(struct kcore_copy_info *kci, return 0; } -static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff, - u64 s, u64 e) +static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end, + u64 pgoff, u64 s, u64 e) { - if (p->addr || s < start || s >= end) - return; + u64 len, offset; + + if (s < start || s >= end) + return 0; - p->addr = s; - p->offset = (s - start) + pgoff; - p->len = e < end ? e - s : end - s; + offset = (s - start) + pgoff; + len = e < end ? e - s : end - s; + + return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1; } static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data) { struct kcore_copy_info *kci = data; u64 end = start + len; + struct sym_data *sdat; - kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext, - kci->etext); + if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext)) + return -1; - kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module, - kci->last_module_symbol); + if (kcore_copy__map(kci, start, end, pgoff, kci->first_module, + kci->last_module_symbol)) + return -1; + + list_for_each_entry(sdat, &kci->syms, node) { + u64 s = round_down(sdat->addr, page_size); + + if (kcore_copy__map(kci, start, end, pgoff, s, s + len)) + return -1; + } return 0; } @@ -1520,6 +1598,64 @@ static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf) return 0; } +static void kcore_copy__find_remaps(struct kcore_copy_info *kci) +{ + struct phdr_data *p, *k = NULL; + u64 kend; + + if (!kci->stext) + return; + + /* Find phdr that corresponds to the kernel map (contains stext) */ + kcore_copy__for_each_phdr(kci, p) { + u64 pend = p->addr + p->len - 1; + + if (p->addr <= kci->stext && pend >= kci->stext) { + k = p; + break; + } + } + + if (!k) + return; + + kend = k->offset + k->len; + + /* Find phdrs that remap the kernel */ + kcore_copy__for_each_phdr(kci, p) { + u64 pend = p->offset + p->len; + + if (p == k) + continue; + + if (p->offset >= k->offset && pend <= kend) + p->remaps = k; + } +} + +static void kcore_copy__layout(struct kcore_copy_info *kci) +{ + struct phdr_data *p; + off_t rel = 0; + + kcore_copy__find_remaps(kci); + + kcore_copy__for_each_phdr(kci, p) { + if (!p->remaps) { + p->rel = rel; + rel += p->len; + } + kci->phnum += 1; + } + + kcore_copy__for_each_phdr(kci, p) { + struct phdr_data *k = p->remaps; + + if (k) + p->rel = p->offset - k->offset + k->rel; + } +} + static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, Elf *elf) { @@ -1555,7 +1691,12 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, if (kci->first_module && !kci->last_module_symbol) return -1; - return kcore_copy__read_maps(kci, elf); + if (kcore_copy__read_maps(kci, elf)) + return -1; + + kcore_copy__layout(kci); + + return 0; } static int kcore_copy__copy_file(const char *from_dir, const char *to_dir, @@ -1678,12 +1819,15 @@ int kcore_copy(const char *from_dir, const char *to_dir) { struct kcore kcore; struct kcore extract; - size_t count = 2; int idx = 0, err = -1; - off_t offset = page_size, sz, modules_offset = 0; + off_t offset, sz; struct kcore_copy_info kci = { .stext = 0, }; char kcore_filename[PATH_MAX]; char extract_filename[PATH_MAX]; + struct phdr_data *p; + + INIT_LIST_HEAD(&kci.phdrs); + INIT_LIST_HEAD(&kci.syms); if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms")) return -1; @@ -1703,20 +1847,17 @@ int kcore_copy(const char *from_dir, const char *to_dir) if (kcore__init(&extract, extract_filename, kcore.elfclass, false)) goto out_kcore_close; - if (!kci.modules_map.addr) - count -= 1; - - if (kcore__copy_hdr(&kcore, &extract, count)) + if (kcore__copy_hdr(&kcore, &extract, kci.phnum)) goto out_extract_close; - if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr, - kci.kernel_map.len)) - goto out_extract_close; + offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) + + gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT); + offset = round_up(offset, page_size); + + kcore_copy__for_each_phdr(&kci, p) { + off_t offs = p->rel + offset; - if (kci.modules_map.addr) { - modules_offset = offset + kci.kernel_map.len; - if (kcore__add_phdr(&extract, idx, modules_offset, - kci.modules_map.addr, kci.modules_map.len)) + if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len)) goto out_extract_close; } @@ -1724,14 +1865,14 @@ int kcore_copy(const char *from_dir, const char *to_dir) if (sz < 0 || sz > offset) goto out_extract_close; - if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset, - kci.kernel_map.len)) - goto out_extract_close; + kcore_copy__for_each_phdr(&kci, p) { + off_t offs = p->rel + offset; - if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset, - extract.fd, modules_offset, - kci.modules_map.len)) - goto out_extract_close; + if (p->remaps) + continue; + if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len)) + goto out_extract_close; + } if (kcore_copy__compare_file(from_dir, to_dir, "modules")) goto out_extract_close; @@ -1754,6 +1895,9 @@ out_unlink_kallsyms: if (err) kcore_copy__unlink(to_dir, "kallsyms"); + kcore_copy__free_phdrs(&kci); + kcore_copy__free_syms(&kci); + return err; } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index ff48d0d49584..7119df77dc0b 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -288,8 +288,7 @@ void symsrc__destroy(struct symsrc *ss) } int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused, - struct symsrc *ss __maybe_unused, - struct map *map __maybe_unused) + struct symsrc *ss __maybe_unused) { return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1466814ebada..d188b7588152 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <string.h> #include <linux/kernel.h> +#include <linux/mman.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/param.h> @@ -39,7 +40,6 @@ char **vmlinux_path; struct symbol_conf symbol_conf = { .use_modules = true, .try_vmlinux_path = true, - .annotate_src = true, .demangle = true, .demangle_kernel = false, .cumulate_callchain = true, @@ -70,18 +70,10 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -bool symbol_type__is_a(char symbol_type, enum map_type map_type) +static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); - - switch (map_type) { - case MAP__FUNCTION: - return symbol_type == 'T' || symbol_type == 'W'; - case MAP__VARIABLE: - return symbol_type == 'D'; - default: - return false; - } + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; } static int prefix_underscores_count(const char *str) @@ -228,9 +220,9 @@ void symbols__fixup_end(struct rb_root *symbols) curr->end = roundup(curr->start, 4096) + 4096; } -void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) +void map_groups__fixup_end(struct map_groups *mg) { - struct maps *maps = &mg->maps[type]; + struct maps *maps = &mg->maps; struct map *next, *curr; down_write(&maps->lock); @@ -256,7 +248,7 @@ out_unlock: up_write(&maps->lock); } -struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) +struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; struct symbol *sym = calloc(1, (symbol_conf.priv_size + @@ -274,6 +266,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) sym->start = start; sym->end = len ? start + len : start; + sym->type = type; sym->binding = binding; sym->namelen = namelen - 1; @@ -484,45 +477,40 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, void dso__reset_find_symbol_cache(struct dso *dso) { - enum map_type type; - - for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { - dso->last_find_result[type].addr = 0; - dso->last_find_result[type].symbol = NULL; - } + dso->last_find_result.addr = 0; + dso->last_find_result.symbol = NULL; } -void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) +void dso__insert_symbol(struct dso *dso, struct symbol *sym) { - __symbols__insert(&dso->symbols[type], sym, dso->kernel); + __symbols__insert(&dso->symbols, sym, dso->kernel); /* update the symbol cache if necessary */ - if (dso->last_find_result[type].addr >= sym->start && - (dso->last_find_result[type].addr < sym->end || + if (dso->last_find_result.addr >= sym->start && + (dso->last_find_result.addr < sym->end || sym->start == sym->end)) { - dso->last_find_result[type].symbol = sym; + dso->last_find_result.symbol = sym; } } -struct symbol *dso__find_symbol(struct dso *dso, - enum map_type type, u64 addr) +struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { - if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) { - dso->last_find_result[type].addr = addr; - dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); + if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { + dso->last_find_result.addr = addr; + dso->last_find_result.symbol = symbols__find(&dso->symbols, addr); } - return dso->last_find_result[type].symbol; + return dso->last_find_result.symbol; } -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) +struct symbol *dso__first_symbol(struct dso *dso) { - return symbols__first(&dso->symbols[type]); + return symbols__first(&dso->symbols); } -struct symbol *dso__last_symbol(struct dso *dso, enum map_type type) +struct symbol *dso__last_symbol(struct dso *dso) { - return symbols__last(&dso->symbols[type]); + return symbols__last(&dso->symbols); } struct symbol *dso__next_symbol(struct symbol *sym) @@ -539,24 +527,22 @@ struct symbol *symbol__next_by_name(struct symbol *sym) } /* - * Teturns first symbol that matched with @name. + * Returns first symbol that matched with @name. */ -struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, - const char *name) +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name) { - struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name, + struct symbol *s = symbols__find_by_name(&dso->symbol_names, name, SYMBOL_TAG_INCLUDE__NONE); if (!s) - s = symbols__find_by_name(&dso->symbol_names[type], name, + s = symbols__find_by_name(&dso->symbol_names, name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); return s; } -void dso__sort_by_name(struct dso *dso, enum map_type type) +void dso__sort_by_name(struct dso *dso) { - dso__set_sorted_by_name(dso, type); - return symbols__sort_by_name(&dso->symbol_names[type], - &dso->symbols[type]); + dso__set_sorted_by_name(dso); + return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); } int modules__parse(const char *filename, void *arg, @@ -621,11 +607,6 @@ out: return err; } -struct process_kallsyms_args { - struct map *map; - struct dso *dso; -}; - /* * These are symbols in the kernel image, so make sure that * sym is from a kernel DSO. @@ -661,10 +642,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name, char type, u64 start) { struct symbol *sym; - struct process_kallsyms_args *a = arg; - struct rb_root *root = &a->dso->symbols[a->map->type]; + struct dso *dso = arg; + struct rb_root *root = &dso->symbols; - if (!symbol_type__is_a(type, a->map->type)) + if (!symbol_type__filter(type)) return 0; /* @@ -672,7 +653,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * symbols, setting length to 0, and rely on * symbols__fixup_end() to fix it up. */ - sym = symbol__new(start, 0, kallsyms2elf_binding(type), name); + sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -689,21 +670,18 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * so that we can in the next step set the symbol ->end address and then * call kernel_maps__split_kallsyms. */ -static int dso__load_all_kallsyms(struct dso *dso, const char *filename, - struct map *map) +static int dso__load_all_kallsyms(struct dso *dso, const char *filename) { - struct process_kallsyms_args args = { .map = map, .dso = dso, }; - return kallsyms__parse(filename, &args, map__process_kallsym_symbol); + return kallsyms__parse(filename, dso, map__process_kallsym_symbol); } -static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) +static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso) { - struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; int count = 0; - struct rb_root old_root = dso->symbols[map->type]; - struct rb_root *root = &dso->symbols[map->type]; + struct rb_root old_root = dso->symbols; + struct rb_root *root = &dso->symbols; struct rb_node *next = rb_first(root); if (!kmaps) @@ -723,7 +701,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) if (module) *module = '\0'; - curr_map = map_groups__find(kmaps, map->type, pos->start); + curr_map = map_groups__find(kmaps, pos->start); if (!curr_map) { symbol__delete(pos); @@ -733,7 +711,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) pos->start -= curr_map->start - curr_map->pgoff; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; - symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + symbols__insert(&curr_map->dso->symbols, pos); ++count; } @@ -748,22 +726,25 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) +static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta, + struct map *initial_map) { - struct map_groups *kmaps = map__kmaps(map); struct machine *machine; - struct map *curr_map = map; + struct map *curr_map = initial_map; struct symbol *pos; int count = 0, moved = 0; - struct rb_root *root = &dso->symbols[map->type]; + struct rb_root *root = &dso->symbols; struct rb_node *next = rb_first(root); int kernel_range = 0; + bool x86_64; if (!kmaps) return -1; machine = kmaps->machine; + x86_64 = machine__is(machine, "x86_64"); + while (next) { char *module; @@ -778,7 +759,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) *module++ = '\0'; if (strcmp(curr_map->dso->short_name, module)) { - if (curr_map != map && + if (curr_map != initial_map && dso->kernel == DSO_TYPE_GUEST_KERNEL && machine__is_default_guest(machine)) { /* @@ -788,18 +769,16 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) * symbols are in its kmap. Mark it as * loaded. */ - dso__set_loaded(curr_map->dso, - curr_map->type); + dso__set_loaded(curr_map->dso); } - curr_map = map_groups__find_by_name(kmaps, - map->type, module); + curr_map = map_groups__find_by_name(kmaps, module); if (curr_map == NULL) { pr_debug("%s/proc/{kallsyms,modules} " "inconsistency while looking " "for \"%s\" module!\n", machine->root_dir, module); - curr_map = map; + curr_map = initial_map; goto discard_symbol; } @@ -809,11 +788,21 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) } /* * So that we look just like we get from .ko files, - * i.e. not prelinked, relative to map->start. + * i.e. not prelinked, relative to initial_map->start. */ pos->start = curr_map->map_ip(curr_map, pos->start); pos->end = curr_map->map_ip(curr_map, pos->end); - } else if (curr_map != map) { + } else if (x86_64 && is_entry_trampoline(pos->name)) { + /* + * These symbols are not needed anymore since the + * trampoline maps refer to the text section and it's + * symbols instead. Avoid having to deal with + * relocations, and the assumption that the first symbol + * is the start of kernel text, by simply removing the + * symbols at this point. + */ + goto discard_symbol; + } else if (curr_map != initial_map) { char dso_name[PATH_MAX]; struct dso *ndso; @@ -824,7 +813,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) } if (count == 0) { - curr_map = map; + curr_map = initial_map; goto add_symbol; } @@ -843,7 +832,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) ndso->kernel = dso->kernel; - curr_map = map__new2(pos->start, ndso, map->type); + curr_map = map__new2(pos->start, ndso); if (curr_map == NULL) { dso__put(ndso); return -1; @@ -858,9 +847,9 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) pos->end -= delta; } add_symbol: - if (curr_map != map) { + if (curr_map != initial_map) { rb_erase(&pos->rb_node, root); - symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + symbols__insert(&curr_map->dso->symbols, pos); ++moved; } else ++count; @@ -871,10 +860,10 @@ discard_symbol: symbol__delete(pos); } - if (curr_map != map && + if (curr_map != initial_map && dso->kernel == DSO_TYPE_GUEST_KERNEL && machine__is_default_guest(kmaps->machine)) { - dso__set_loaded(curr_map->dso, curr_map->type); + dso__set_loaded(curr_map->dso); } return count + moved; @@ -1035,7 +1024,12 @@ out_delete_from: return ret; } -static int do_validate_kcore_modules(const char *filename, struct map *map, +struct map *map_groups__first(struct map_groups *mg) +{ + return maps__first(&mg->maps); +} + +static int do_validate_kcore_modules(const char *filename, struct map_groups *kmaps) { struct rb_root modules = RB_ROOT; @@ -1046,13 +1040,12 @@ static int do_validate_kcore_modules(const char *filename, struct map *map, if (err) return err; - old_map = map_groups__first(kmaps, map->type); + old_map = map_groups__first(kmaps); while (old_map) { struct map *next = map_groups__next(old_map); struct module_info *mi; - if (old_map == map || old_map->start == map->start) { - /* The kernel map */ + if (!__map__is_kmodule(old_map)) { old_map = next; continue; } @@ -1109,7 +1102,7 @@ static int validate_kcore_modules(const char *kallsyms_filename, kallsyms_filename)) return -EINVAL; - if (do_validate_kcore_modules(modules_filename, map, kmaps)) + if (do_validate_kcore_modules(modules_filename, kmaps)) return -EINVAL; return 0; @@ -1138,7 +1131,6 @@ static int validate_kcore_addresses(const char *kallsyms_filename, struct kcore_mapfn_data { struct dso *dso; - enum map_type type; struct list_head maps; }; @@ -1147,7 +1139,7 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) struct kcore_mapfn_data *md = data; struct map *map; - map = map__new2(start, md->dso, md->type); + map = map__new2(start, md->dso); if (map == NULL) return -ENOMEM; @@ -1163,13 +1155,13 @@ static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { struct map_groups *kmaps = map__kmaps(map); - struct machine *machine; struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL; + struct machine *machine; bool is_64_bit; int err, fd; char kcore_filename[PATH_MAX]; - struct symbol *sym; + u64 stext; if (!kmaps) return -EINVAL; @@ -1177,7 +1169,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, machine = kmaps->machine; /* This function requires that the map is the kernel map */ - if (map != machine->vmlinux_maps[map->type]) + if (!__map__is_kernel(map)) return -EINVAL; if (!filename_from_kallsyms_filename(kcore_filename, "kcore", @@ -1189,7 +1181,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map, return -EINVAL; md.dso = dso; - md.type = map->type; INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); @@ -1200,7 +1191,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Read new maps into temporary lists */ - err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, + err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md, &is_64_bit); if (err) goto out_err; @@ -1212,7 +1203,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - old_map = map_groups__first(kmaps, map->type); + old_map = map_groups__first(kmaps); while (old_map) { struct map *next = map_groups__next(old_map); @@ -1220,14 +1211,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map_groups__remove(kmaps, old_map); old_map = next; } + machine->trampolines_mapped = false; - /* Find the kernel map using the first symbol */ - sym = dso__first_symbol(dso, map->type); - list_for_each_entry(new_map, &md.maps, node) { - if (sym && sym->start >= new_map->start && - sym->start < new_map->end) { - replacement_map = new_map; - break; + /* Find the kernel map using the '_stext' symbol */ + if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) { + list_for_each_entry(new_map, &md.maps, node) { + if (stext >= new_map->start && stext < new_map->end) { + replacement_map = new_map; + break; + } } } @@ -1256,6 +1248,19 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__put(new_map); } + if (machine__is(machine, "x86_64")) { + u64 addr; + + /* + * If one of the corresponding symbols is there, assume the + * entry trampoline maps are too. + */ + if (!kallsyms__get_function_start(kallsyms_filename, + ENTRY_TRAMPOLINE_NAME, + &addr)) + machine->trampolines_mapped = true; + } + /* * Set the data type and long name so that kcore can be read via * dso__data_read_addr(). @@ -1268,7 +1273,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, close(fd); - if (map->type == MAP__FUNCTION) + if (map->prot & PROT_EXEC) pr_debug("Using %s for kernel object code\n", kcore_filename); else pr_debug("Using %s for kernel data\n", kcore_filename); @@ -1289,14 +1294,10 @@ out_err: * If the kernel is relocated at boot time, kallsyms won't match. Compute the * delta based on the relocation reference symbol. */ -static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) +static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta) { - struct kmap *kmap = map__kmap(map); u64 addr; - if (!kmap) - return -1; - if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; @@ -1310,19 +1311,23 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, bool no_kcore) { + struct kmap *kmap = map__kmap(map); u64 delta = 0; if (symbol__restricted_filename(filename, "/proc/kallsyms")) return -1; - if (dso__load_all_kallsyms(dso, filename, map) < 0) + if (!kmap || !kmap->kmaps) return -1; - if (kallsyms__delta(map, filename, &delta)) + if (dso__load_all_kallsyms(dso, filename) < 0) return -1; - symbols__fixup_end(&dso->symbols[map->type]); - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (kallsyms__delta(kmap, filename, &delta)) + return -1; + + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; @@ -1330,9 +1335,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; if (!no_kcore && !dso__load_kcore(dso, map, filename)) - return dso__split_kallsyms_for_kcore(dso, map); + return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso); else - return dso__split_kallsyms(dso, map, delta); + return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map); } int dso__load_kallsyms(struct dso *dso, const char *filename, @@ -1341,8 +1346,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, return __dso__load_kallsyms(dso, filename, map, false); } -static int dso__load_perf_map(const char *map_path, struct dso *dso, - struct map *map) +static int dso__load_perf_map(const char *map_path, struct dso *dso) { char *line = NULL; size_t n; @@ -1379,12 +1383,12 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso, if (len + 2 >= line_len) continue; - sym = symbol__new(start, size, STB_GLOBAL, line + len); + sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len); if (sym == NULL) goto out_delete_line; - symbols__insert(&dso->symbols[map->type], sym); + symbols__insert(&dso->symbols, sym); nr_syms++; } @@ -1509,25 +1513,27 @@ int dso__load(struct dso *dso, struct map *map) pthread_mutex_lock(&dso->lock); /* check again under the dso->lock */ - if (dso__loaded(dso, map->type)) { + if (dso__loaded(dso)) { ret = 1; goto out; } + if (map->groups && map->groups->machine) + machine = map->groups->machine; + else + machine = NULL; + if (dso->kernel) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) ret = dso__load_guest_kernel_sym(dso, map); + if (machine__is(machine, "x86_64")) + machine__map_x86_64_entry_trampolines(machine, dso); goto out; } - if (map->groups && map->groups->machine) - machine = map->groups->machine; - else - machine = NULL; - dso->adjust_symbols = 0; if (perfmap) { @@ -1542,7 +1548,7 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - ret = dso__load_perf_map(map_path, dso, map); + ret = dso__load_perf_map(map_path, dso); dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : DSO_BINARY_TYPE__NOT_FOUND; goto out; @@ -1651,7 +1657,7 @@ int dso__load(struct dso *dso, struct map *map) if (ret > 0) { int nr_plt; - nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map); + nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss); if (nr_plt > 0) ret += nr_plt; } @@ -1663,17 +1669,16 @@ out_free: if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) ret = 0; out: - dso__set_loaded(dso, map->type); + dso__set_loaded(dso); pthread_mutex_unlock(&dso->lock); nsinfo__mountns_exit(&nsc); return ret; } -struct map *map_groups__find_by_name(struct map_groups *mg, - enum map_type type, const char *name) +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { - struct maps *maps = &mg->maps[type]; + struct maps *maps = &mg->maps; struct map *map; down_read(&maps->lock); @@ -1720,7 +1725,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else dso->binary_type = DSO_BINARY_TYPE__VMLINUX; dso__set_long_name(dso, vmlinux, vmlinux_allocated); - dso__set_loaded(dso, map->type); + dso__set_loaded(dso); pr_debug("Using %s for symbols\n", symfs_vmlinux); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 70c16741f50a..f25fae4b5743 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -57,7 +57,8 @@ struct symbol { u64 start; u64 end; u16 namelen; - u8 binding; + u8 type:4; + u8 binding:4; u8 idle:1; u8 ignore:1; u8 inlined:1; @@ -89,7 +90,6 @@ struct intlist; struct symbol_conf { unsigned short priv_size; - unsigned short nr_events; bool try_vmlinux_path, init_annotation, force, @@ -108,8 +108,6 @@ struct symbol_conf { show_cpu_utilization, initialized, kptr_restrict, - annotate_asm_raw, - annotate_src, event_group, demangle, demangle_kernel, @@ -259,17 +257,16 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, bool no_kcore); int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); -void dso__insert_symbol(struct dso *dso, enum map_type type, +void dso__insert_symbol(struct dso *dso, struct symbol *sym); -struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, - u64 addr); -struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, - const char *name); +struct symbol *dso__find_symbol(struct dso *dso, u64 addr); +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); + struct symbol *symbol__next_by_name(struct symbol *sym); -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); -struct symbol *dso__last_symbol(struct dso *dso, enum map_type type); +struct symbol *dso__first_symbol(struct dso *dso); +struct symbol *dso__last_symbol(struct dso *dso); struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); @@ -288,7 +285,7 @@ void symbol__exit(void); void symbol__elf_init(void); int symbol__annotation_init(void); -struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); +struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name); size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, bool unknown_as_addr, @@ -300,7 +297,6 @@ size_t __symbol__fprintf_symname(const struct symbol *sym, bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); size_t symbol__fprintf(struct symbol *sym, FILE *fp); -bool symbol_type__is_a(char symbol_type, enum map_type map_type); bool symbol__restricted_filename(const char *filename, const char *restricted_filename); int symbol__config_symfs(const struct option *opt __maybe_unused, @@ -308,8 +304,7 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule); -int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, - struct map *map); +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); @@ -317,7 +312,7 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) void symbols__insert(struct rb_root *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root *symbols); void symbols__fixup_end(struct rb_root *symbols); -void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); +void map_groups__fixup_end(struct map_groups *mg); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 6dd2cb88ccbe..ed0205cc7942 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -58,13 +58,13 @@ size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) } size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp) + FILE *fp) { size_t ret = 0; struct rb_node *nd; struct symbol_name_rb_node *pos; - for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); fprintf(fp, "%s\n", pos->sym.name); } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 68b65b10579b..2048d393ece6 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -302,23 +302,20 @@ int thread__insert_map(struct thread *thread, struct map *map) static int __thread__prepare_access(struct thread *thread) { bool initialized = false; - int i, err = 0; - - for (i = 0; i < MAP__NR_TYPES; ++i) { - struct maps *maps = &thread->mg->maps[i]; - struct map *map; + int err = 0; + struct maps *maps = &thread->mg->maps; + struct map *map; - down_read(&maps->lock); + down_read(&maps->lock); - for (map = maps__first(maps); map; map = map__next(map)) { - err = unwind__prepare_access(thread, map, &initialized); - if (err || initialized) - break; - } - - up_read(&maps->lock); + for (map = maps__first(maps); map; map = map__next(map)) { + err = unwind__prepare_access(thread, map, &initialized); + if (err || initialized) + break; } + up_read(&maps->lock); + return err; } @@ -335,8 +332,6 @@ static int thread__prepare_access(struct thread *thread) static int thread__clone_map_groups(struct thread *thread, struct thread *parent) { - int i; - /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) return thread__prepare_access(thread); @@ -348,9 +343,8 @@ static int thread__clone_map_groups(struct thread *thread, } /* But this one is new process, copy maps. */ - for (i = 0; i < MAP__NR_TYPES; ++i) - if (map_groups__clone(thread, parent->mg, i) < 0) - return -ENOMEM; + if (map_groups__clone(thread, parent->mg) < 0) + return -ENOMEM; return 0; } @@ -371,8 +365,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) return thread__clone_map_groups(thread, parent); } -void thread__find_cpumode_addr_location(struct thread *thread, - enum map_type type, u64 addr, +void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct addr_location *al) { size_t i; @@ -384,7 +377,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_addr_location(thread, cpumodes[i], type, addr, al); + thread__find_symbol(thread, cpumodes[i], addr, al); if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 14d44c3235b8..07606aa6998d 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -92,16 +92,13 @@ size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); -void thread__find_addr_map(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al); +struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al); -void thread__find_addr_location(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al); +struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al); -void thread__find_cpumode_addr_location(struct thread *thread, - enum map_type type, u64 addr, +void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct addr_location *al); static inline void *thread__priv(struct thread *thread) diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 9892323cdd7c..9add1f72ce95 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -3,6 +3,7 @@ #define __PERF_TOP_H 1 #include "tool.h" +#include "annotate.h" #include <linux/types.h> #include <stddef.h> #include <stdbool.h> @@ -16,6 +17,7 @@ struct perf_top { struct perf_tool tool; struct perf_evlist *evlist; struct record_opts record_opts; + struct annotation_options annotation_opts; /* * Symbols will be added here in perf_event__process_sample and will * get out after decayed. @@ -35,7 +37,6 @@ struct perf_top { struct perf_session *session; struct winsize winsize; int realtime_prio; - int sym_pcnt_filter; const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d7f2113462fb..c85d0d1a65ed 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -103,11 +103,10 @@ out: static int record_header_files(void) { - char *path; + char *path = get_events_file("header_page"); struct stat st; int err = -EIO; - path = get_tracing_file("events/header_page"); if (!path) { pr_debug("can't get tracing/events/header_page"); return -ENOMEM; @@ -128,9 +127,9 @@ static int record_header_files(void) goto out; } - put_tracing_file(path); + put_events_file(path); - path = get_tracing_file("events/header_event"); + path = get_events_file("header_event"); if (!path) { pr_debug("can't get tracing/events/header_event"); err = -ENOMEM; @@ -154,7 +153,7 @@ static int record_header_files(void) err = 0; out: - put_tracing_file(path); + put_events_file(path); return err; } @@ -243,7 +242,7 @@ static int record_ftrace_files(struct tracepoint_path *tps) char *path; int ret; - path = get_tracing_file("events/ftrace"); + path = get_events_file("ftrace"); if (!path) { pr_debug("can't get tracing/events/ftrace"); return -ENOMEM; diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 16a776371d03..1aa368603268 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -75,6 +75,7 @@ void trace_event__cleanup(struct trace_event *t) static struct event_format* tp_format(const char *sys, const char *name) { + char *tp_dir = get_events_file(sys); struct pevent *pevent = tevent.pevent; struct event_format *event = NULL; char path[PATH_MAX]; @@ -82,8 +83,11 @@ tp_format(const char *sys, const char *name) char *data; int err; - scnprintf(path, PATH_MAX, "%s/%s/%s/format", - tracing_events_path, sys, name); + if (!tp_dir) + return ERR_PTR(-errno); + + scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name); + put_events_file(tp_dir); err = filename__read_str(path, &data, &size); if (err) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7bdd239c795c..538db4e5d1e6 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -28,10 +28,11 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; - - thread__find_addr_location(ui->thread, - PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, al); + /* + * Some callers will use al->sym, so we can't just use the + * cheaper thread__find_map() here. + */ + thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al); if (al->map) dso = al->map->dso; @@ -103,19 +104,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, addr, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, addr, &al); - } - - if (!al.map) { + if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); return -1; } diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index af873044d33a..6a11bc7e6b27 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -366,19 +366,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, static struct map *find_map(unw_word_t ip, struct unwind_info *ui) { struct addr_location al; - - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, ip, &al); - } - return al.map; + return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al); } static int @@ -586,12 +574,9 @@ static int entry(u64 ip, struct thread *thread, struct unwind_entry e; struct addr_location al; - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - + e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); e.ip = al.addr; e.map = al.map; - e.sym = al.sym; pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", al.sym ? al.sym->name : "''", diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 1019bbc5dbd8..eac5b858a371 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -38,11 +38,43 @@ void perf_set_multithreaded(void) } unsigned int page_size; -int cacheline_size; + +#ifdef _SC_LEVEL1_DCACHE_LINESIZE +#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) +#else +static void cache_line_size(int *cacheline_sizep) +{ + if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) + pr_debug("cannot determine cache line size"); +} +#endif + +int cacheline_size(void) +{ + static int size; + + if (!size) + cache_line_size(&size); + + return size; +} int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; +int sysctl__max_stack(void) +{ + int value; + + if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) + sysctl_perf_event_max_stack = value; + + if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) + sysctl_perf_event_max_contexts_per_stack = value; + + return sysctl_perf_event_max_stack; +} + bool test_attr__enabled; bool perf_host = true; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c9626c206208..dc58254a2b69 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -43,7 +43,9 @@ size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; -extern int cacheline_size; +int __pure cacheline_size(void); + +int sysctl__max_stack(void); int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 0acb1ec0e2f0..741af209b19d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -139,12 +139,10 @@ static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map *map; - struct dso *dso; + struct map *map = map_groups__first(thread->mg); - map = map_groups__first(thread->mg, MAP__FUNCTION); for (; map ; map = map_groups__next(map)) { - dso = map->dso; + struct dso *dso = map->dso; if (!dso || dso->long_name[0] != '/') continue; dso_type = dso__type(dso, machine); diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index 9b65f052081f..9ba8a44ad2a7 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname) dirname, time(NULL)); } - dprintf("logilename: %s\n", filename); + dprintf("logfilename: %s\n", filename); output = fopen(filename, "w+"); if (output == NULL) { diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 5b3205f16217..5b8c4956ff9a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num) } } +#ifdef __powerpc__ +void map_power_idle_state_name(char *tmp) +{ + if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop0L"); + else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop1L"); + else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop2L"); +} +#else +void map_power_idle_state_name(char *tmp) { } +#endif + static struct cpuidle_monitor *cpuidle_register(void) { int num; @@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void) if (tmp == NULL) continue; + map_power_idle_state_name(tmp); fix_up_intel_idle_driver_name(tmp, num); strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 05f953f0f0a0..051da0a7c454 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -70,36 +70,43 @@ void print_n_spaces(int n) printf(" "); } -/* size of s must be at least n + 1 */ +/*s is filled with left and right spaces + *to make its length atleast n+1 + */ int fill_string_with_spaces(char *s, int n) { + char *temp; int len = strlen(s); - if (len > n) + + if (len >= n) return -1; + + temp = malloc(sizeof(char) * (n+1)); for (; len < n; len++) s[len] = ' '; s[len] = '\0'; + snprintf(temp, n+1, " %s", s); + strcpy(s, temp); + free(temp); return 0; } +#define MAX_COL_WIDTH 6 void print_header(int topology_depth) { int unsigned mon; int state, need_len; cstate_t s; char buf[128] = ""; - int percent_width = 4; fill_string_with_spaces(buf, topology_depth * 5 - 1); printf("%s|", buf); for (mon = 0; mon < avail_monitors; mon++) { - need_len = monitors[mon]->hw_states_num * (percent_width + 3) + need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1) - 1; - if (mon != 0) { - printf("|| "); - need_len--; - } + if (mon != 0) + printf("||"); sprintf(buf, "%s", monitors[mon]->name); fill_string_with_spaces(buf, need_len); printf("%s", buf); @@ -107,23 +114,21 @@ void print_header(int topology_depth) printf("\n"); if (topology_depth > 2) - printf("PKG |"); + printf(" PKG|"); if (topology_depth > 1) printf("CORE|"); if (topology_depth > 0) - printf("CPU |"); + printf(" CPU|"); for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) - printf("|| "); - else - printf(" "); + printf("||"); for (state = 0; state < monitors[mon]->hw_states_num; state++) { if (state != 0) - printf(" | "); + printf("|"); s = monitors[mon]->hw_states[state]; sprintf(buf, "%s", s.name); - fill_string_with_spaces(buf, percent_width); + fill_string_with_spaces(buf, MAX_COL_WIDTH); printf("%s", buf); } printf(" "); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9e43f3371fbc..2ae50b499e0a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -15,7 +15,16 @@ #define MONITORS_MAX 20 #define MONITOR_NAME_LEN 20 + +/* CSTATE_NAME_LEN is limited by header field width defined + * in cpupower-monitor.c. Header field width is defined to be + * sum of percent width and two spaces for padding. + */ +#ifdef __powerpc__ +#define CSTATE_NAME_LEN 7 +#else #define CSTATE_NAME_LEN 5 +#endif #define CSTATE_DESC_LEN 60 int cpu_count; diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index abb4c38f029b..8ee626c0f6a5 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 # # Tool for analyzing boot timing # Copyright (c) 2013, Intel Corporation. diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index 4f80ad7d7275..f8fcb06fd68b 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -105,7 +105,7 @@ override-dev-timeline-functions: true # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst # # example: cpu=%di:s32 # @@ -170,7 +170,7 @@ pm_restore_console: # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst # # example: port=+36(%di):s32 # diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 18baaf6300c9..070be2cf7f74 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -168,6 +168,7 @@ Create a summary page of all tests in \fIindir\fR. Creates summary.html in the current folder. The output page is a table of tests with suspend and resume values sorted by suspend mode, host, and kernel. Includes test averages by mode and links to the test html files. +Use -genhtml to include tests with missing html. .TP \fB-modes\fR List available suspend modes. @@ -179,6 +180,9 @@ with any options you intend to use to see if they will work. \fB-fpdt\fR Print out the contents of the ACPI Firmware Performance Data Table. .TP +\fB-battery\fR +Print out battery status and current charge. +.TP \fB-sysinfo\fR Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. .TP diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 266409fb27ae..0c760478f7d7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 # # Tool for analyzing suspend/resume timing # Copyright (c) 2013, Intel Corporation. @@ -69,7 +69,7 @@ from subprocess import call, Popen, PIPE # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.0' + version = '5.1' ansi = False rs = 0 display = 0 @@ -240,7 +240,7 @@ class SystemValues: kprobes = dict() timeformat = '%.3f' cmdline = '%s %s' % \ - (os.path.basename(sys.argv[0]), string.join(sys.argv[1:], ' ')) + (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:])) def __init__(self): self.archargs = 'args_'+platform.machine() self.hostname = platform.node() @@ -917,12 +917,18 @@ class Data: self.devicegroups.append([phase]) self.errorinfo = {'suspend':[],'resume':[]} def extractErrorInfo(self): + elist = { + 'HWERROR' : '.*\[ *Hardware Error *\].*', + 'FWBUG' : '.*\[ *Firmware Bug *\].*', + 'BUG' : '.*BUG.*', + 'ERROR' : '.*ERROR.*', + 'WARNING' : '.*WARNING.*', + 'IRQ' : '.*genirq: .*', + 'TASKFAIL': '.*Freezing of tasks failed.*', + } lf = sysvals.openlog(sysvals.dmesgfile, 'r') i = 0 list = [] - # sl = start line, et = error time, el = error line - type = 'ERROR' - sl = et = el = -1 for line in lf: i += 1 m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) @@ -931,43 +937,13 @@ class Data: t = float(m.group('ktime')) if t < self.start or t > self.end: continue - if t < self.tSuspended: - dir = 'suspend' - else: - dir = 'resume' + dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - if re.match('-*\[ *cut here *\]-*', msg): - type = 'WARNING' - sl = i - elif re.match('genirq: .*', msg): - type = 'IRQ' - sl = i - elif re.match('BUG: .*', msg) or re.match('kernel BUG .*', msg): - type = 'BUG' - sl = i - elif re.match('-*\[ *end trace .*\]-*', msg) or \ - re.match('R13: .*', msg): - if et >= 0 and sl >= 0: - list.append((type, dir, et, sl, i)) - self.kerror = True - sl = et = el = -1 - type = 'ERROR' - elif 'Call Trace:' in msg: - if el >= 0 and et >= 0: - list.append((type, dir, et, el, el)) + for err in elist: + if re.match(elist[err], msg): + list.append((err, dir, t, i, i)) self.kerror = True - et, el = t, i - if sl < 0 or type == 'BUG': - slval = i - if sl >= 0: - slval = sl - list.append((type, dir, et, slval, i)) - self.kerror = True - sl = et = el = -1 - type = 'ERROR' - if el >= 0 and et >= 0: - list.append((type, dir, et, el, el)) - self.kerror = True + break for e in list: type, dir, t, idx1, idx2 = e sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t)) @@ -2331,12 +2307,14 @@ class TestProps: sv.suspendmode = data.stamp['mode'] if sv.suspendmode == 'command' and sv.ftracefile != '': modes = ['on', 'freeze', 'standby', 'mem', 'disk'] - out = Popen(['grep', 'machine_suspend', sv.ftracefile], - stderr=PIPE, stdout=PIPE).stdout.read() - m = re.match('.* machine_suspend\[(?P<mode>.*)\]', out) - if m and m.group('mode') in ['1', '2', '3', '4']: - sv.suspendmode = modes[int(m.group('mode'))] - data.stamp['mode'] = sv.suspendmode + fp = sysvals.openlog(sv.ftracefile, 'r') + for line in fp: + m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line) + if m and m.group('mode') in ['1', '2', '3', '4']: + sv.suspendmode = modes[int(m.group('mode'))] + data.stamp['mode'] = sv.suspendmode + break + fp.close() m = re.match(self.cmdlinefmt, self.cmdline) if m: sv.cmdline = m.group('cmd') @@ -2413,7 +2391,7 @@ class ProcessMonitor: # markers, and/or kprobes required for primary parsing. def doesTraceLogHaveTraceEvents(): kpcheck = ['_cal: (', '_cpu_down()'] - techeck = sysvals.traceevents[:] + techeck = ['suspend_resume'] tmcheck = ['SUSPEND START', 'RESUME COMPLETE'] sysvals.usekprobes = False fp = sysvals.openlog(sysvals.ftracefile, 'r') @@ -2808,7 +2786,7 @@ def parseTraceLog(live=False): # -- phase changes -- # start of kernel suspend if(re.match('suspend_enter\[.*', t.name)): - if(isbegin): + if(isbegin and data.start == data.tKernSus): data.dmesg[phase]['start'] = t.time data.tKernSus = t.time continue @@ -3072,13 +3050,20 @@ def parseTraceLog(live=False): sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name)) cg.newActionFromFunction(data) if sysvals.suspendmode == 'command': - return testdata + return (testdata, '') # fill in any missing phases + error = [] for data in testdata: + tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1)) + terr = '' lp = data.phases[0] for p in data.phases: if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0): + if not terr: + print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp) + terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp) + error.append(terr) sysvals.vprint('WARNING: phase "%s" is missing!' % p) if(data.dmesg[p]['start'] < 0): data.dmesg[p]['start'] = data.dmesg[lp]['end'] @@ -3106,7 +3091,7 @@ def parseTraceLog(live=False): for j in range(i + 1, tc): testdata[j].mergeOverlapDevices(devlist) testdata[0].stitchTouchingThreads(testdata[1:]) - return testdata + return (testdata, ', '.join(error)) # Function: loadKernelLog # Description: @@ -3173,7 +3158,7 @@ def loadKernelLog(): if data: testruns.append(data) if len(testruns) < 1: - doError(' dmesg log has no suspend/resume data: %s' \ + print('ERROR: dmesg log has no suspend/resume data: %s' \ % sysvals.dmesgfile) # fix lines with same timestamp/function with the call and return swapped @@ -3521,68 +3506,144 @@ def createHTMLSummarySimple(testruns, htmlfile, folder): .summary {border:1px solid;}\n\ th {border: 1px solid black;background:#222;color:white;}\n\ td {font: 16px "Times New Roman";text-align: center;}\n\ - tr.alt td {background:#ddd;}\n\ - tr.avg td {background:#aaa;}\n\ + tr.head td {border: 1px solid black;background:#aaa;}\n\ + tr.alt {background-color:#ddd;}\n\ + tr.notice {color:red;}\n\ + .minval {background-color:#BBFFBB;}\n\ + .medval {background-color:#BBBBFF;}\n\ + .maxval {background-color:#FFBBBB;}\n\ + .head a {color:#000;text-decoration: none;}\n\ </style>\n</head>\n<body>\n' + # extract the test data into list + list = dict() + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] + num = 0 + lastmode = '' + cnt = {'pass':0, 'fail':0, 'hang':0} + for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): + mode = data['mode'] + if mode not in list: + list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]} + if lastmode and lastmode != mode and num > 0: + for i in range(2): + s = sorted(tMed[i]) + list[lastmode]['med'][i] = s[int(len(s)/2)] + iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] + list[lastmode]['min'] = tMin + list[lastmode]['max'] = tMax + list[lastmode]['idx'] = (iMin, iMed, iMax) + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] + num = 0 + tVal = [float(data['suspend']), float(data['resume'])] + list[mode]['data'].append([data['host'], data['kernel'], + data['time'], tVal[0], tVal[1], data['url'], data['result'], + data['issues']]) + idx = len(list[mode]['data']) - 1 + if data['result'] == 'pass': + cnt['pass'] += 1 + for i in range(2): + tMed[i].append(tVal[i]) + tAvg[i] += tVal[i] + if tMin[i] == 0 or tVal[i] < tMin[i]: + iMin[i] = idx + tMin[i] = tVal[i] + if tMax[i] == 0 or tVal[i] > tMax[i]: + iMax[i] = idx + tMax[i] = tVal[i] + num += 1 + elif data['result'] == 'hang': + cnt['hang'] += 1 + elif data['result'] == 'fail': + cnt['fail'] += 1 + lastmode = mode + if lastmode and num > 0: + for i in range(2): + s = sorted(tMed[i]) + list[lastmode]['med'][i] = s[int(len(s)/2)] + iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] + list[lastmode]['min'] = tMin + list[lastmode]['max'] = tMax + list[lastmode]['idx'] = (iMin, iMed, iMax) + # group test header - html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns)) + desc = [] + for ilk in sorted(cnt, reverse=True): + if cnt[ilk] > 0: + desc.append('%d %s' % (cnt[ilk], ilk)) + html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc)) th = '\t<th>{0}</th>\n' td = '\t<td>{0}</td>\n' + tdh = '\t<td{1}>{0}</td>\n' tdlink = '\t<td><a href="{0}">html</a></td>\n' # table header html += '<table class="summary">\n<tr>\n' + th.format('#') +\ th.format('Mode') + th.format('Host') + th.format('Kernel') +\ - th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\ - th.format('Detail') + '</tr>\n' - - # test data, 1 row per test - avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\ - '<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n' - sTimeAvg = rTimeAvg = 0.0 - mode = '' - num = 0 - for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): - if mode != data['mode']: - # test average line - if(num > 0): - sTimeAvg /= (num - 1) - rTimeAvg /= (num - 1) - html += avg.format('%d' % (num - 1), mode, - '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) - sTimeAvg = rTimeAvg = 0.0 - mode = data['mode'] - num = 1 - # alternate row color - if num % 2 == 1: - html += '<tr class="alt">\n' + th.format('Test Time') + th.format('Result') + th.format('Issues') +\ + th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n' + + # export list into html + head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\ + '<td colspan=8 class="sus">Suspend Avg={2} '+\ + '<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\ + '<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\ + '<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\ + 'Resume Avg={6} '+\ + '<span class=minval><a href="#r{10}min">Min={7}</a></span> '+\ + '<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\ + '<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\ + '</tr>\n' + headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n' + for mode in list: + # header line for each suspend mode + num = 0 + tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\ + list[mode]['max'], list[mode]['med'] + count = len(list[mode]['data']) + if 'idx' in list[mode]: + iMin, iMed, iMax = list[mode]['idx'] + html += head.format('%d' % count, mode.upper(), + '%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0], + '%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1], + mode.lower() + ) else: - html += '<tr>\n' - html += td.format("%d" % num) - num += 1 - # basic info - for item in ['mode', 'host', 'kernel', 'time']: - val = "unknown" - if(item in data): - val = data[item] - html += td.format(val) - # suspend time - sTime = float(data['suspend']) - sTimeAvg += sTime - html += td.format('%.3f ms' % sTime) - # resume time - rTime = float(data['resume']) - rTimeAvg += rTime - html += td.format('%.3f ms' % rTime) - # link to the output html - html += tdlink.format(data['url']) + '</tr>\n' - # last test average line - if(num > 0): - sTimeAvg /= (num - 1) - rTimeAvg /= (num - 1) - html += avg.format('%d' % (num - 1), mode, - '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) + iMin = iMed = iMax = [-1, -1, -1] + html += headnone.format('%d' % count, mode.upper()) + for d in list[mode]['data']: + # row classes - alternate row color + rcls = ['alt'] if num % 2 == 1 else [] + if d[6] != 'pass': + rcls.append('notice') + html += '<tr class="'+(' '.join(rcls))+'">\n' if len(rcls) > 0 else '<tr>\n' + # figure out if the line has sus or res highlighted + idx = list[mode]['data'].index(d) + tHigh = ['', ''] + for i in range(2): + tag = 's%s' % mode if i == 0 else 'r%s' % mode + if idx == iMin[i]: + tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag + elif idx == iMax[i]: + tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag + elif idx == iMed[i]: + tHigh[i] = ' id="%smed" class=medval title="Median"' % tag + html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row + html += td.format(mode) # mode + html += td.format(d[0]) # host + html += td.format(d[1]) # kernel + html += td.format(d[2]) # time + html += td.format(d[6]) # result + html += td.format(d[7]) # issues + html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('') # suspend + html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('') # resume + html += tdlink.format(d[5]) if d[5] else td.format('') # url + html += '</tr>\n' + num += 1 # flush the data to file hf = open(htmlfile, 'w') @@ -3607,7 +3668,7 @@ def ordinal(value): # testruns: array of Data objects from parseKernelLog or parseTraceLog # Output: # True if the html file was created, false if it failed -def createHTML(testruns): +def createHTML(testruns, testfail): if len(testruns) < 1: print('ERROR: Not enough test data to build a timeline') return @@ -3641,6 +3702,7 @@ def createHTML(testruns): '<td class="purple">{4}Firmware Resume: {2} ms</td>'\ '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\ '</tr>\n</table>\n' + html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n' # html format variables scaleH = 20 @@ -3708,6 +3770,9 @@ def createHTML(testruns): resume_time, testdesc, stitle, rtitle) devtl.html += thtml + if testfail: + devtl.html += html_fail.format(testfail) + # time scale for potentially multiple datasets t0 = testruns[0].start tMax = testruns[-1].end @@ -4006,6 +4071,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): .blue {background:rgba(169,208,245,0.4);}\n\ .time1 {font:22px Arial;border:1px solid;}\n\ .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\ + .testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\ td {text-align:center;}\n\ r {color:#500000;font:15px Tahoma;}\n\ n {color:#505050;font:15px Tahoma;}\n\ @@ -4927,6 +4993,25 @@ def dmidecode(mempath, fatal=False): count += 1 return out +def getBattery(): + p = '/sys/class/power_supply' + bat = dict() + for d in os.listdir(p): + type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower() + if type != 'battery': + continue + for v in ['status', 'energy_now', 'capacity_now']: + bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower() + break + ac = True + if 'status' in bat and 'discharging' in bat['status']: + ac = False + charge = 0 + for v in ['energy_now', 'capacity_now']: + if v in bat and bat[v]: + charge = int(bat[v]) + return (ac, charge) + # Function: getFPDT # Description: # Read the acpi bios tables and pull out FPDT, the firmware data @@ -5202,8 +5287,9 @@ def getArgFloat(name, args, min, max, main=True): def processData(live=False): print('PROCESSING DATA') + error = '' if(sysvals.usetraceevents): - testruns = parseTraceLog(live) + testruns, error = parseTraceLog(live) if sysvals.dmesgfile: for data in testruns: data.extractErrorInfo() @@ -5220,15 +5306,18 @@ def processData(live=False): for data in testruns: data.debugPrint() sys.exit() - + if len(testruns) < 1: + return (testruns, {'error': 'timeline generation failed'}) sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile) - createHTML(testruns) + createHTML(testruns, error) print('DONE') data = testruns[0] stamp = data.stamp stamp['suspend'], stamp['resume'] = data.getTimeValues() if data.fwValid: stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume + if error: + stamp['error'] = error return (testruns, stamp) # Function: rerunTest @@ -5268,58 +5357,88 @@ def runTest(n=0): sysvals.sudouser(sysvals.testdir) sysvals.outputResult(stamp, n) -def find_in_html(html, strs, div=False): - for str in strs: - l = len(str) - i = html.find(str) - if i >= 0: +def find_in_html(html, start, end, firstonly=True): + n, out = 0, [] + while n < len(html): + m = re.search(start, html[n:]) + if not m: break - if i < 0: - return '' - if not div: - return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group() - n = html[i+l:].find('</div>') - if n < 0: + i = m.end() + m = re.search(end, html[n+i:]) + if not m: + break + j = m.start() + str = html[n+i:n+i+j] + if end == 'ms': + num = re.search(r'[-+]?\d*\.\d+|\d+', str) + str = num.group() if num else 'NaN' + if firstonly: + return str + out.append(str) + n += i+j + if firstonly: return '' - return html[i+l:i+l+n] + return out # Function: runSummary # Description: # create a summary of tests in a sub-directory -def runSummary(subdir, local=True): +def runSummary(subdir, local=True, genhtml=False): inpath = os.path.abspath(subdir) outpath = inpath if local: outpath = os.path.abspath('.') print('Generating a summary of folder "%s"' % inpath) + if genhtml: + for dirname, dirnames, filenames in os.walk(subdir): + sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = '' + for filename in filenames: + if(re.match('.*_dmesg.txt', filename)): + sysvals.dmesgfile = os.path.join(dirname, filename) + elif(re.match('.*_ftrace.txt', filename)): + sysvals.ftracefile = os.path.join(dirname, filename) + sysvals.setOutputFile() + if sysvals.ftracefile and sysvals.htmlfile and \ + not os.path.exists(sysvals.htmlfile): + print('FTRACE: %s' % sysvals.ftracefile) + if sysvals.dmesgfile: + print('DMESG : %s' % sysvals.dmesgfile) + rerunTest() testruns = [] for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: if(not re.match('.*.html', filename)): continue file = os.path.join(dirname, filename) - html = open(file, 'r').read(10000) - suspend = find_in_html(html, - ['Kernel Suspend: ', 'Kernel Suspend Time: ']) - resume = find_in_html(html, - ['Kernel Resume: ', 'Kernel Resume Time: ']) - line = find_in_html(html, ['<div class="stamp">'], True) + html = open(file, 'r').read() + suspend = find_in_html(html, 'Kernel Suspend', 'ms') + resume = find_in_html(html, 'Kernel Resume', 'ms') + line = find_in_html(html, '<div class="stamp">', '</div>') stmp = line.split() - if not suspend or not resume or len(stmp) < 4: + if not suspend or not resume or len(stmp) != 8: continue + try: + dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p') + except: + continue + tstr = dt.strftime('%Y/%m/%d %H:%M:%S') + error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>') + result = 'fail' if error else 'pass' + ilist = [] + e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '→</div>', False) + for i in list(set(e)): + ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i) data = { + 'mode': stmp[2], 'host': stmp[0], 'kernel': stmp[1], - 'mode': stmp[2], - 'time': string.join(stmp[3:], ' '), + 'time': tstr, + 'result': result, + 'issues': ','.join(ilist), 'suspend': suspend, 'resume': resume, 'url': os.path.relpath(file, outpath), } - if len(stmp) == 7: - data['kernel'] = 'unknown' - data['mode'] = stmp[1] - data['time'] = string.join(stmp[2:], ' ') testruns.append(data) outfile = os.path.join(outpath, 'summary.html') print('Summary file: %s' % outfile) @@ -5609,11 +5728,12 @@ def printHelp(): print(' -modes List available suspend modes') print(' -status Test to see if the system is enabled to run this tool') print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') + print(' -battery Print out battery info (if available)') print(' -sysinfo Print out system info extracted from BIOS') print(' -devinfo Print out the pm settings of all devices which support runtime suspend') print(' -flist Print the list of functions currently being captured in ftrace') print(' -flistall Print all functions capable of being captured in ftrace') - print(' -summary directory Create a summary of all test in this dir') + print(' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]') print(' [redo]') print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') @@ -5623,8 +5743,9 @@ def printHelp(): # ----------------- MAIN -------------------- # exec start (skipped if script is loaded as library) if __name__ == '__main__': + genhtml = False cmd = '' - simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status'] + simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery'] if '-f' in sys.argv: sysvals.cgskip = sysvals.configFile('cgskip.txt') # loop through the command line arguments @@ -5660,6 +5781,8 @@ if __name__ == '__main__': sysvals.skiphtml = True elif(arg == '-cgdump'): sysvals.cgdump = True + elif(arg == '-genhtml'): + genhtml = True elif(arg == '-addlogs'): sysvals.dmesglog = sysvals.ftracelog = True elif(arg == '-verbose'): @@ -5856,6 +5979,8 @@ if __name__ == '__main__': statusCheck(True) elif(cmd == 'fpdt'): getFPDT(True) + elif(cmd == 'battery'): + print 'AC Connect: %s\nCharge: %d' % getBattery() elif(cmd == 'sysinfo'): sysvals.printSystemInfo(True) elif(cmd == 'devinfo'): @@ -5867,7 +5992,7 @@ if __name__ == '__main__': elif(cmd == 'flistall'): sysvals.getFtraceFilterFunctions(False) elif(cmd == 'summary'): - runSummary(sysvals.outdir, True) + runSummary(sysvals.outdir, True, genhtml) sys.exit() # if instructed, re-analyze existing data files @@ -5920,7 +6045,7 @@ if __name__ == '__main__': print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count'])) sysvals.logmsg = '' if not sysvals.skiphtml: - runSummary(sysvals.outdir, False) + runSummary(sysvals.outdir, False, False) sysvals.sudouser(sysvals.outdir) else: if sysvals.outdir: diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 29f50d4cfea0..84e2b648e622 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -28,6 +28,7 @@ import subprocess import os import time import re +import signal import sys import getopt import Gnuplot @@ -78,11 +79,12 @@ def print_help(): print(' Or') print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>') print(' To generate trace file, parse and plot, use (sudo required):') - print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name>') + print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>') print(' Or') - print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name>') + print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>') print(' Optional argument:') - print(' cpus: comma separated list of CPUs') + print(' cpus: comma separated list of CPUs') + print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240') print(' Output:') print(' If not already present, creates a "results/test_name" folder in the current working directory with:') print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.') @@ -379,7 +381,7 @@ def clear_trace_file(): f_handle.close() except: print('IO error clearing trace file ') - quit() + sys.exit(2) def enable_trace(): """ Enable trace """ @@ -389,7 +391,7 @@ def enable_trace(): , 'w').write("1") except: print('IO error enabling trace ') - quit() + sys.exit(2) def disable_trace(): """ Disable trace """ @@ -399,17 +401,17 @@ def disable_trace(): , 'w').write("0") except: print('IO error disabling trace ') - quit() + sys.exit(2) def set_trace_buffer_size(): """ Set trace buffer size """ try: - open('/sys/kernel/debug/tracing/buffer_size_kb' - , 'w').write("10240") + with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp: + fp.write(memory) except: - print('IO error setting trace buffer size ') - quit() + print('IO error setting trace buffer size ') + sys.exit(2) def free_trace_buffer(): """ Free the trace buffer memory """ @@ -418,8 +420,8 @@ def free_trace_buffer(): open('/sys/kernel/debug/tracing/buffer_size_kb' , 'w').write("1") except: - print('IO error setting trace buffer size ') - quit() + print('IO error freeing trace buffer ') + sys.exit(2) def read_trace_data(filename): """ Read and parse trace data """ @@ -431,7 +433,7 @@ def read_trace_data(filename): data = open(filename, 'r').read() except: print('Error opening ', filename) - quit() + sys.exit(2) for line in data.splitlines(): search_obj = \ @@ -489,10 +491,22 @@ def read_trace_data(filename): # Now seperate the main overall csv file into per CPU csv files. split_csv() +def signal_handler(signal, frame): + print(' SIGINT: Forcing cleanup before exit.') + if interval: + disable_trace() + clear_trace_file() + # Free the memory + free_trace_buffer() + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + interval = "" filename = "" cpu_list = "" testname = "" +memory = "10240" graph_data_present = False; valid1 = False @@ -501,7 +515,7 @@ valid2 = False cpu_mask = zeros((MAX_CPUS,), dtype=int) try: - opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="]) + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) except getopt.GetoptError: print_help() sys.exit(2) @@ -521,6 +535,8 @@ for opt, arg in opts: elif opt in ("-n", "--name"): valid2 = True testname = arg + elif opt in ("-m", "--memory"): + memory = arg if not (valid1 and valid2): print_help() @@ -569,6 +585,11 @@ current_max_cpu = 0 read_trace_data(filename) +clear_trace_file() +# Free the memory +if interval: + free_trace_buffer() + if graph_data_present == False: print('No valid data to plot') sys.exit(2) @@ -593,9 +614,4 @@ for root, dirs, files in os.walk('.'): for f in files: fix_ownership(f) -clear_trace_file() -# Free the memory -if interval: - free_trace_buffer() - os.chdir('../../') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4ea385be528f..e2926f72a821 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -29,6 +29,8 @@ #include "nfit_test.h" #include "../watermark.h" +#include <asm/mcsafe_test.h> + /* * Generate an NFIT table to describe the following topology: * @@ -1989,8 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t) pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; pcap->header.length = sizeof(*pcap); pcap->highest_capability = 1; - pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | - ACPI_NFIT_CAPABILITY_MEM_FLUSH; + pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH; offset += pcap->header.length; if (t->setup_hotplug) { @@ -2681,6 +2682,107 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; +static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +enum INJECT { + INJECT_NONE, + INJECT_SRC, + INJECT_DST, +}; + +static void mcsafe_test_init(char *dst, char *src, size_t size) +{ + size_t i; + + memset(dst, 0xff, size); + for (i = 0; i < size; i++) + src[i] = (char) i; +} + +static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, + size_t size, unsigned long rem) +{ + size_t i; + + for (i = 0; i < size - rem; i++) + if (dst[i] != (unsigned char) i) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n", + __func__, __LINE__, i, dst[i], + (unsigned char) i); + return false; + } + for (i = size - rem; i < size; i++) + if (dst[i] != 0xffU) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n", + __func__, __LINE__, i, dst[i]); + return false; + } + return true; +} + +void mcsafe_test(void) +{ + char *inject_desc[] = { "none", "source", "destination" }; + enum INJECT inj; + + if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + pr_info("%s: run...\n", __func__); + } else { + pr_info("%s: disabled, skip.\n", __func__); + return; + } + + for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) { + int i; + + pr_info("%s: inject: %s\n", __func__, inject_desc[inj]); + for (i = 0; i < 512; i++) { + unsigned long expect, rem; + void *src, *dst; + bool valid; + + switch (inj) { + case INJECT_NONE: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 0; + break; + case INJECT_SRC: + mcsafe_inject_src(&mcsafe_buf[1024]); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 512 - i; + break; + case INJECT_DST: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(&mcsafe_buf[2048]); + dst = &mcsafe_buf[2048 - i]; + src = &mcsafe_buf[1024]; + expect = 512 - i; + break; + } + + mcsafe_test_init(dst, src, 512); + rem = __memcpy_mcsafe(dst, src, 512); + valid = mcsafe_test_validate(dst, src, 512, expect); + if (rem == expect && valid) + continue; + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", + __func__, + ((unsigned long) dst) & ~PAGE_MASK, + ((unsigned long ) src) & ~PAGE_MASK, + 512, i, rem, valid ? "valid" : "bad", + expect); + } + } + + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); +} + static __init int nfit_test_init(void) { int rc, i; @@ -2689,6 +2791,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); + mcsafe_test(); nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 32aafa92074c..f1fe492c8e17 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -3,6 +3,7 @@ TARGETS = android TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities +TARGETS += cgroup TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += efivarfs @@ -28,9 +29,12 @@ TARGETS += powerpc TARGETS += proc TARGETS += pstore TARGETS += ptrace +TARGETS += rseq +TARGETS += rtc TARGETS += seccomp TARGETS += sigaltstack TARGETS += size +TARGETS += sparc64 TARGETS += splice TARGETS += static_keys TARGETS += sync @@ -134,7 +138,8 @@ ifdef INSTALL_PATH echo "else" >> $(ALL_SCRIPT) echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) echo "fi" >> $(ALL_SCRIPT) - echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT) + echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT) + echo "export skip=4" >> $(ALL_SCRIPT) for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index f6304d2be90c..72c25a3cb658 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -18,10 +18,6 @@ all: fi \ done -override define RUN_TESTS - @cd $(OUTPUT); ./run.sh -endef - override define INSTALL_RULE mkdir -p $(INSTALL_PATH) install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @@ -33,10 +29,6 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS - echo "./run.sh" -endef - override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh index a1aff506f5e6..69e676cfc94e 100755 --- a/tools/testing/selftests/android/ion/ion_test.sh +++ b/tools/testing/selftests/android/ion/ion_test.sh @@ -4,6 +4,9 @@ heapsize=4096 TCID="ion_test.sh" errcode=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + run_test() { heaptype=$1 @@ -25,7 +28,7 @@ check_root() uid=$(id -u) if [ $uid -ne 0 ]; then echo $TCID: must be run as root >&2 - exit 0 + exit $ksft_skip fi } @@ -35,7 +38,7 @@ check_device() if [ ! -e $DEVICE ]; then echo $TCID: No $DEVICE device found >&2 echo $TCID: May be CONFIG_ION is not set >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 5e1ab2f0eb79..49938d72cf63 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -15,3 +15,7 @@ test_libbpf_open test_sock test_sock_addr urandom_read +test_btf +test_sockmap +test_lirc_mode2_user +get_cgroup_id_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0a315ddabbf4..7a6214e9ae58 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,28 +10,30 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -lrt -lpthread TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read all: $(TEST_CUSTOM_PROGS) -$(TEST_CUSTOM_PROGS): urandom_read - -urandom_read: urandom_read.c - $(CC) -o $(TEST_CUSTOM_PROGS) -static $< +$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c + $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_sock_addr + test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o + sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ + test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ + test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ + test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ + get_cgroup_id_kern.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -39,10 +41,13 @@ TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ test_xdp_meta.sh \ test_offload.py \ - test_sock_addr.sh + test_sock_addr.sh \ + test_tunnel.sh \ + test_lwt_seg6local.sh \ + test_lirc_mode2.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr include ../lib.mk @@ -55,6 +60,9 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c +$(OUTPUT)/test_sockmap: cgroup_helpers.c +$(OUTPUT)/test_progs: trace_helpers.c +$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c .PHONY: force @@ -66,6 +74,8 @@ $(BPFOBJ): force CLANG ?= clang LLC ?= llc +LLVM_OBJCOPY ?= llvm-objcopy +BTF_PAHOLE ?= pahole PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) @@ -77,15 +87,42 @@ else CPU ?= generic endif +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version 2>&1 | grep LLVM) + +ifneq ($(BTF_LLC_PROBE),) +ifneq ($(BTF_PAHOLE_PROBE),) +ifneq ($(BTF_OBJCOPY_PROBE),) + CLANG_FLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif +endif +endif + $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ - $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d8223d99f96d..f2f28b6c8915 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -75,9 +75,14 @@ static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = (void *) BPF_FUNC_sock_ops_cb_flags_set; static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; +static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) = + (void *) BPF_FUNC_sk_redirect_hash; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = (void *) BPF_FUNC_sock_map_update; +static int (*bpf_sock_hash_update)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_sock_hash_update; static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_event_read_value; @@ -88,6 +93,9 @@ static int (*bpf_override_return)(void *ctx, unsigned long rc) = (void *) BPF_FUNC_override_return; static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_msg_redirect_map; +static int (*bpf_msg_redirect_hash)(void *ctx, + void *map, void *key, int flags) = + (void *) BPF_FUNC_msg_redirect_hash; static int (*bpf_msg_apply_bytes)(void *ctx, int len) = (void *) BPF_FUNC_msg_apply_bytes; static int (*bpf_msg_cork_bytes)(void *ctx, int len) = @@ -96,6 +104,35 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; +static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_tail; +static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, + int size, int flags) = + (void *) BPF_FUNC_skb_get_xfrm_state; +static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = + (void *) BPF_FUNC_get_stack; +static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, + int plen, __u32 flags) = + (void *) BPF_FUNC_fib_lookup; +static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr, + unsigned int len) = + (void *) BPF_FUNC_lwt_push_encap; +static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset, + void *from, unsigned int len) = + (void *) BPF_FUNC_lwt_seg6_store_bytes; +static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param, + unsigned int param_len) = + (void *) BPF_FUNC_lwt_seg6_action; +static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset, + unsigned int len) = + (void *) BPF_FUNC_lwt_seg6_adjust_srh; +static int (*bpf_rc_repeat)(void *ctx) = + (void *) BPF_FUNC_rc_repeat; +static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, + unsigned long long scancode, unsigned int toggle) = + (void *) BPF_FUNC_rc_keydown; +static unsigned long long (*bpf_get_current_cgroup_id)(void) = + (void *) BPF_FUNC_get_current_cgroup_id; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -129,6 +166,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = + (void *) BPF_FUNC_csum_diff; static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; static int (*bpf_skb_change_head)(void *, int len, int flags) = diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h new file mode 100644 index 000000000000..59bf3e1a9371 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_rand.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_RAND__ +#define __BPF_RAND__ + +#include <stdint.h> +#include <stdlib.h> +#include <time.h> + +static inline uint64_t bpf_rand_mask(uint64_t mask) +{ + return (((uint64_t)(uint32_t)rand()) | + ((uint64_t)(uint32_t)rand() << 32)) & mask; +} + +#define bpf_rand_ux(x, m) \ +static inline uint64_t bpf_rand_u##x(int shift) \ +{ \ + return bpf_rand_mask((m)) << shift; \ +} + +bpf_rand_ux( 8, 0xffULL) +bpf_rand_ux(16, 0xffffULL) +bpf_rand_ux(24, 0xffffffULL) +bpf_rand_ux(32, 0xffffffffULL) +bpf_rand_ux(40, 0xffffffffffULL) +bpf_rand_ux(48, 0xffffffffffffULL) +bpf_rand_ux(56, 0xffffffffffffffULL) +bpf_rand_ux(64, 0xffffffffffffffffULL) + +static inline void bpf_semi_rand_init(void) +{ + srand(time(NULL)); +} + +static inline uint64_t bpf_semi_rand_get(void) +{ + switch (rand() % 39) { + case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0); + case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0); + case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0); + case 3: return 0x8000000000000000ULL | bpf_rand_u32(0); + case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0); + case 5: return 0x0000000100000000ULL | bpf_rand_u24(0); + case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0); + case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0); + case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24); + case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0); + case 10: return 0x0000000010000000ULL | bpf_rand_u32(0); + case 11: return 0xf000000000000000ULL | bpf_rand_u8(0); + case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8); + case 13: return 0x000000000f000000ULL | bpf_rand_u8(16); + case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32); + case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48); + case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1); + case 17: return 0xffff800000000000ULL | bpf_rand_u8(4); + case 18: return 0xffff800000000000ULL | bpf_rand_u8(20); + case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0); + case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0); + case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20); + case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40); + case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64); + case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64); + case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64); + case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64); + case 27: return 0x0000800000000000ULL; + case 28: return 0x8000000000000000ULL; + case 29: return 0x0000000000000000ULL; + case 30: return 0xffffffffffffffffULL; + case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64); + case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64); + case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64); + case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64); + case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64); + case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64); + case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64); + default: return bpf_rand_u64(0); + } +} + +#endif /* __BPF_RAND__ */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f3bca3ade0f3..c87b4e052ce9 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -6,6 +6,7 @@ #include <sys/types.h> #include <linux/limits.h> #include <stdio.h> +#include <stdlib.h> #include <linux/sched.h> #include <fcntl.h> #include <unistd.h> @@ -176,3 +177,59 @@ int create_and_get_cgroup(char *path) return fd; } + +/** + * get_cgroup_id() - Get cgroup id for a particular cgroup path + * @path: The cgroup path, relative to the workdir, to join + * + * On success, it returns the cgroup id. On failure it returns 0, + * which is an invalid cgroup id. + * If there is a failure, it prints the error to stderr. + */ +unsigned long long get_cgroup_id(char *path) +{ + int dirfd, err, flags, mount_id, fhsize; + union { + unsigned long long cgid; + unsigned char raw_bytes[8]; + } id; + char cgroup_workdir[PATH_MAX + 1]; + struct file_handle *fhp, *fhp2; + unsigned long long ret = 0; + + format_cgroup_path(cgroup_workdir, path); + + dirfd = AT_FDCWD; + flags = 0; + fhsize = sizeof(*fhp); + fhp = calloc(1, fhsize); + if (!fhp) { + log_err("calloc"); + return 0; + } + err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); + if (err >= 0 || fhp->handle_bytes != 8) { + log_err("name_to_handle_at"); + goto free_mem; + } + + fhsize = sizeof(struct file_handle) + fhp->handle_bytes; + fhp2 = realloc(fhp, fhsize); + if (!fhp2) { + log_err("realloc"); + goto free_mem; + } + err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); + fhp = fhp2; + if (err < 0) { + log_err("name_to_handle_at"); + goto free_mem; + } + + memcpy(id.raw_bytes, fhp->f_handle, 8); + ret = id.cgid; + +free_mem: + free(fhp); + return ret; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 06485e0002b3..20a4a5dcd469 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -13,5 +13,6 @@ int create_and_get_cgroup(char *path); int join_cgroup(char *path); int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); +unsigned long long get_cgroup_id(char *path); #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 1eefe211a4a8..b4994a94968b 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -6,4 +6,15 @@ CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCHED=y CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_IPIP=y +CONFIG_IPV6=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPGRE=y +CONFIG_IPV6_GRE=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_HMAC=m +CONFIG_CRYPTO_SHA256=m +CONFIG_VXLAN=y +CONFIG_GENEVE=y diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c new file mode 100644 index 000000000000..014dba10b8a5 --- /dev/null +++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") cg_ids = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") pidmap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int trace(void *ctx) +{ + __u32 pid = bpf_get_current_pid_tgid(); + __u32 key = 0, *expected_pid; + __u64 *val; + + expected_pid = bpf_map_lookup_elem(&pidmap, &key); + if (!expected_pid || *expected_pid != pid) + return 0; + + val = bpf_map_lookup_elem(&cg_ids, &key); + if (val) + *val = bpf_get_current_cgroup_id(); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c new file mode 100644 index 000000000000..e8da7b39158d --- /dev/null +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <syscall.h> +#include <unistd.h> +#include <linux/perf_event.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + printf("%s:PASS:%s\n", __func__, tag); \ + } \ + __ret; \ +}) + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) + return -1; + return bpf_map__fd(map); +} + +#define TEST_CGROUP "/test-bpf-get-cgroup-id/" + +int main(int argc, char **argv) +{ + const char *probe_name = "syscalls/sys_enter_nanosleep"; + const char *file = "get_cgroup_id_kern.o"; + int err, bytes, efd, prog_fd, pmu_fd; + int cgroup_fd, cgidmap_fd, pidmap_fd; + struct perf_event_attr attr = {}; + struct bpf_object *obj; + __u64 kcgid = 0, ucgid; + __u32 key = 0, pid; + int exit_code = 1; + char buf[256]; + + err = setup_cgroup_environment(); + if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err, + errno)) + return 1; + + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n", + cgroup_fd, errno)) + goto cleanup_cgroup_env; + + err = join_cgroup(TEST_CGROUP); + if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno)) + goto cleanup_cgroup_env; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + goto cleanup_cgroup_env; + + cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids"); + if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + cgidmap_fd, errno)) + goto close_prog; + + pidmap_fd = bpf_find_map(__func__, obj, "pidmap"); + if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + pidmap_fd, errno)) + goto close_prog; + + pid = getpid(); + bpf_map_update_elem(pidmap_fd, &key, &pid, 0); + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read", + "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + /* attach to this pid so the all bpf invocations will be in the + * cgroup associated with this pid. + */ + pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, + errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + /* trigger some syscalls */ + sleep(1); + + err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid); + if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno)) + goto close_pmu; + + ucgid = get_cgroup_id(TEST_CGROUP); + if (CHECK(kcgid != ucgid, "compare_cgroup_id", + "kern cgid %llx user cgid %llx", kcgid, ucgid)) + goto close_pmu; + + exit_code = 0; + printf("%s:PASS\n", argv[0]); + +close_pmu: + close(pmu_fd); +close_prog: + bpf_object__close(obj); +cleanup_cgroup_env: + cleanup_cgroup_environment(); + return exit_code; +} diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c new file mode 100644 index 000000000000..a91536b1c47e --- /dev/null +++ b/tools/testing/selftests/bpf/sendmsg4_prog.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ +#define SRC2_IP4 0x00000000U +#define SRC_REWRITE_IP4 0x7f000004U +#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */ +#define DST_REWRITE_IP4 0x7f000001U +#define DST_PORT 4040 +#define DST_REWRITE_PORT4 4444 + +int _version SEC("version") = 1; + +SEC("cgroup/sendmsg4") +int sendmsg_v4_prog(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_DGRAM) + return 0; + + /* Rewrite source. */ + if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) || + ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) { + ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + /* Rewrite destination. */ + if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) && + ctx->user_port == bpf_htons(DST_PORT)) { + ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); + ctx->user_port = bpf_htons(DST_REWRITE_PORT4); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c new file mode 100644 index 000000000000..5aeaa284fc47 --- /dev/null +++ b/tools/testing/selftests/bpf/sendmsg6_prog.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP6_0 0 +#define SRC_REWRITE_IP6_1 0 +#define SRC_REWRITE_IP6_2 0 +#define SRC_REWRITE_IP6_3 6 + +#define DST_REWRITE_IP6_0 0 +#define DST_REWRITE_IP6_1 0 +#define DST_REWRITE_IP6_2 0 +#define DST_REWRITE_IP6_3 1 + +#define DST_REWRITE_PORT6 6666 + +int _version SEC("version") = 1; + +SEC("cgroup/sendmsg6") +int sendmsg_v6_prog(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_DGRAM) + return 0; + + /* Rewrite source. */ + if (ctx->msg_src_ip6[3] == bpf_htonl(1) || + ctx->msg_src_ip6[3] == bpf_htonl(0)) { + ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0); + ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1); + ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2); + ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + /* Rewrite destination. */ + if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) && + ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) { + ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); + ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1); + ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); + ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + } else { + /* Unexpected destination. Reject sendmsg. */ + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c new file mode 100644 index 000000000000..4cd5e860c903 --- /dev/null +++ b/tools/testing/selftests/bpf/test_adjust_tail.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("xdp_adjust_tail") +int _xdp_adjust_tail(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int offset = 0; + + if (data_end - data == 54) + offset = 256; + else + offset = 20; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_DROP; + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c new file mode 100644 index 000000000000..3619f3023088 --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf.c @@ -0,0 +1,2315 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/err.h> +#include <bpf/bpf.h> +#include <sys/resource.h> +#include <libelf.h> +#include <gelf.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> + +#include "bpf_rlimit.h" + +static uint32_t pass_cnt; +static uint32_t error_cnt; +static uint32_t skip_cnt; + +#define CHECK(condition, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ + fprintf(stderr, format); \ + } \ + __ret; \ +}) + +static int count_result(int err) +{ + if (err) + error_cnt++; + else + pass_cnt++; + + fprintf(stderr, "\n"); + return err; +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define __printf(a, b) __attribute__((format(printf, a, b))) + +__printf(1, 2) +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +#define BTF_INFO_ENC(kind, root, vlen) \ + ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) + +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) + +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) + +#define BTF_ARRAY_ENC(type, index_type, nr_elems) \ + (type), (index_type), (nr_elems) +#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ + BTF_ARRAY_ENC(type, index_type, nr_elems) + +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) +#define BTF_ENUM_ENC(name, val) (name), (val) + +#define BTF_TYPEDEF_ENC(name, type) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) + +#define BTF_PTR_ENC(name, type) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_END_RAW 0xdeadbeef +#define NAME_TBD 0xdeadb33f + +#define MAX_NR_RAW_TYPES 1024 +#define BTF_LOG_BUF_SIZE 65535 + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +static struct args { + unsigned int raw_test_num; + unsigned int file_test_num; + unsigned int get_info_test_num; + bool raw_test; + bool file_test; + bool get_info_test; + bool pprint_test; + bool always_log; +} args; + +static char btf_log_buf[BTF_LOG_BUF_SIZE]; + +static struct btf_header hdr_tmpl = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), +}; + +struct btf_raw_test { + const char *descr; + const char *str_sec; + const char *map_name; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 str_sec_size; + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 key_type_id; + __u32 value_type_id; + __u32 max_entries; + bool btf_load_err; + bool map_create_err; + int hdr_len_delta; + int type_off_delta; + int str_off_delta; + int str_len_delta; +}; + +static struct btf_raw_test raw_tests[] = { +/* enum E { + * E0, + * E1, + * }; + * + * struct A { + * unsigned long long m; + * int n; + * char o; + * [3 bytes hole] + * int p[8]; + * int q[4][8]; + * enum E r; + * }; + */ +{ + .descr = "struct test #1", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */ + BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */ + /* } */ + /* int[4][8] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */ + /* enum E */ /* [7] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test1_map", + .key_size = sizeof(int), + .value_size = 180, + .key_type_id = 1, + .value_type_id = 5, + .max_entries = 4, +}, + +/* typedef struct b Struct_B; + * + * struct A { + * int m; + * struct b n[4]; + * const Struct_B o[4]; + * }; + * + * struct B { + * int m; + * int n; + * }; + */ +{ + .descr = "struct test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct b [4] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), + + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */ + BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/ + /* } */ + + /* struct B { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ + /* } */ + + /* const int */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + /* typedef struct b Struct_B */ /* [6] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4), + /* const Struct_B */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6), + /* const Struct_B [4] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(7, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B", + .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test2_map", + .key_size = sizeof(int), + .value_size = 68, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, +}, + +/* Test member exceeds the size of struct. + * + * struct A { + * int m; + * int n; + * }; + */ +{ + .descr = "size check test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check1_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * int n[2]; + * }; + */ +{ + .descr = "size check test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* int[2] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check2_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * void *n; + * }; + */ +{ + .descr = "size check test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* void* */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check3_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* Test member exceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * int m; + * enum E n; + * }; + */ +{ + .descr = "size check test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* enum E { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check4_map", + .key_size = sizeof(int), + .value_size = 1, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +/* typedef const void * const_void_ptr; + * struct A { + * const_void_ptr m; + * }; + */ +{ + .descr = "void test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + /* struct A { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const_void_ptr m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr\0A\0m", + .str_sec_size = sizeof("\0const_void_ptr\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + +/* struct A { + * const void m; + * }; + */ +{ + .descr = "void test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8), + /* const void m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member", +}, + +/* typedef const void * const_void_ptr; + * const_void_ptr[4] + */ +{ + .descr = "void test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + /* const_void_ptr[4] */ /* [5] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr", + .str_sec_size = sizeof("\0const_void_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + +/* const void[4] */ +{ + .descr = "void test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void[4] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid elem", +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array A --+ + */ +{ + .descr = "loop test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef is _before_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #2", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* typedef Array_B int_array */ + BTF_TYPEDEF_ENC(1, 4), /* [2] */ + /* Array_A */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */ + /* Array_B */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef is _between_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* typedef Array_B int_array */ /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* Array_B */ /* [4] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* typedef struct B Struct_B + * + * struct A { + * int x; + * Struct_B y; + * }; + * + * struct B { + * int x; + * struct A y; + * }; + */ +{ + .descr = "loop test #5", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct A */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */ + /* typedef struct B Struct_B */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + /* struct B */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test5_map", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +/* struct A { + * int x; + * struct A array_a[4]; + * }; + */ +{ + .descr = "loop test #6", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ + /* struct A */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test6_map", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "loop test #7", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 0), + /* CONST type_id=3 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* PTR type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test7_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "loop test #8", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 0), + /* struct B { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *n; */ + BTF_MEMBER_ENC(NAME_TBD, 6, 0), + /* CONST type_id=5 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5), + /* PTR type_id=6 */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6), + /* CONST type_id=7 */ /* [6] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7), + /* PTR type_id=4 */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0B\0n", + .str_sec_size = sizeof("\0A\0m\0B\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test8_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Loop detected", +}, + +{ + .descr = "string section does not end with null", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int") - 1, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty string section", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = 0, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty type section", + .raw_types = { + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "No type found", +}, + +{ + .descr = "btf_header test. Longer hdr_len", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .hdr_len_delta = 4, + .err_str = "Unsupported btf_header", +}, + +{ + .descr = "btf_header test. Gap between hdr and type", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .type_off_delta = 4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Gap between type and str", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_off_delta = 4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Overlap between type and str", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_off_delta = -4, + .err_str = "Section overlap found", +}, + +{ + .descr = "btf_header test. Larger BTF size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = -4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Smaller BTF size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = 4, + .err_str = "Total section length too long", +}, + +{ + .descr = "array test. index_type/elem_type \"int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type/elem_type \"const int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=1 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(1, 4, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(4, 1, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid array of int", +}, + +{ + .descr = "array test. index_type \"void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 0, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. index_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 3, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid elem", +}, + +{ + .descr = "array test. elem_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. t->size != 0\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1), + BTF_ARRAY_ENC(1, 1, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "size != 0", +}, + +{ + .descr = "int test. invalid int_data", + .raw_types = { + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4), + 0x10000000, + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid int_data", +}, + +{ + .descr = "invalid BTF_INFO", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_ENC(0, 0x10000000, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info", +}, + +{ + .descr = "fwd test. t->type != 0\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* fwd type */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "type != 0", +}, + +}; /* struct btf_raw_test raw_tests[] */ + +static const char *get_next_str(const char *start, const char *end) +{ + return start < end - 1 ? start + 1 : NULL; +} + +static int get_type_sec_size(const __u32 *raw_types) +{ + int i; + + for (i = MAX_NR_RAW_TYPES - 1; + i >= 0 && raw_types[i] != BTF_END_RAW; + i--) + ; + + return i < 0 ? i : i * sizeof(raw_types[0]); +} + +static void *btf_raw_create(const struct btf_header *hdr, + const __u32 *raw_types, + const char *str, + unsigned int str_sec_size, + unsigned int *btf_size) +{ + const char *next_str = str, *end_str = str + str_sec_size; + unsigned int size_needed, offset; + struct btf_header *ret_hdr; + int i, type_sec_size; + uint32_t *ret_types; + void *raw_btf; + + type_sec_size = get_type_sec_size(raw_types); + if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) + return NULL; + + size_needed = sizeof(*hdr) + type_sec_size + str_sec_size; + raw_btf = malloc(size_needed); + if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf")) + return NULL; + + /* Copy header */ + memcpy(raw_btf, hdr, sizeof(*hdr)); + offset = sizeof(*hdr); + + /* Copy type section */ + ret_types = raw_btf + offset; + for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { + if (raw_types[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str")) { + free(raw_btf); + return NULL; + } + ret_types[i] = next_str - str; + next_str += strlen(next_str); + } else { + ret_types[i] = raw_types[i]; + } + } + offset += type_sec_size; + + /* Copy string section */ + memcpy(raw_btf + offset, str, str_sec_size); + + ret_hdr = (struct btf_header *)raw_btf; + ret_hdr->type_len = type_sec_size; + ret_hdr->str_off = type_sec_size; + ret_hdr->str_len = str_sec_size; + + *btf_size = size_needed; + + return raw_btf; +} + +static int do_test_raw(unsigned int test_num) +{ + struct btf_raw_test *test = &raw_tests[test_num - 1]; + struct bpf_create_map_attr create_attr = {}; + int map_fd = -1, btf_fd = -1; + unsigned int raw_btf_size; + struct btf_header *hdr; + void *raw_btf; + int err; + + fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + hdr = raw_btf; + + hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta; + hdr->type_off = (int)hdr->type_off + test->type_off_delta; + hdr->str_off = (int)hdr->str_off + test->str_off_delta; + hdr->str_len = (int)hdr->str_len + test->str_len_delta; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + err = ((btf_fd == -1) != test->btf_load_err); + if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", + btf_fd, test->btf_load_err) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } + + if (err || btf_fd == -1) + goto done; + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); + + err = ((map_fd == -1) != test->map_create_err); + CHECK(err, "map_fd:%d test->map_create_err:%u", + map_fd, test->map_create_err); + +done: + if (!err) + fprintf(stderr, "OK"); + + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); + + return err; +} + +static int test_raw(void) +{ + unsigned int i; + int err = 0; + + if (args.raw_test_num) + return count_result(do_test_raw(args.raw_test_num)); + + for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) + err |= count_result(do_test_raw(i)); + + return err; +} + +struct btf_get_info_test { + const char *descr; + const char *str_sec; + __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 str_sec_size; + int btf_size_delta; + int (*special_test)(unsigned int test_num); +}; + +static int test_big_btf_info(unsigned int test_num); +static int test_btf_id(unsigned int test_num); + +const struct btf_get_info_test get_info_tests[] = { +{ + .descr = "== raw_btf_size+1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .btf_size_delta = 1, +}, +{ + .descr = "== raw_btf_size-3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .btf_size_delta = -3, +}, +{ + .descr = "Large bpf_btf_info", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .special_test = test_big_btf_info, +}, +{ + .descr = "BTF ID", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned int */ /* [2] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .special_test = test_btf_id, +}, +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int test_big_btf_info(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + uint8_t *raw_btf = NULL, *user_btf = NULL; + unsigned int raw_btf_size; + struct { + struct bpf_btf_info info; + uint64_t garbage; + } info_garbage; + struct bpf_btf_info *info; + int btf_fd = -1, err; + uint32_t info_len; + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + user_btf = malloc(raw_btf_size); + if (CHECK(!user_btf, "!user_btf")) { + err = -1; + goto done; + } + + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + /* + * GET_INFO should error out if the userspace info + * has non zero tailing bytes. + */ + info = &info_garbage.info; + memset(info, 0, sizeof(*info)); + info_garbage.garbage = 0xdeadbeef; + info_len = sizeof(info_garbage); + info->btf = ptr_to_u64(user_btf); + info->btf_size = raw_btf_size; + + err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + if (CHECK(!err, "!err")) { + err = -1; + goto done; + } + + /* + * GET_INFO should succeed even info_len is larger than + * the kernel supported as long as tailing bytes are zero. + * The kernel supported info len should also be returned + * to userspace. + */ + info_garbage.garbage = 0; + err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + if (CHECK(err || info_len != sizeof(*info), + "err:%d errno:%d info_len:%u sizeof(*info):%lu", + err, errno, info_len, sizeof(*info))) { + err = -1; + goto done; + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + free(user_btf); + + if (btf_fd != -1) + close(btf_fd); + + return err; +} + +static int test_btf_id(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + struct bpf_create_map_attr create_attr = {}; + uint8_t *raw_btf = NULL, *user_btf[2] = {}; + int btf_fd[2] = {-1, -1}, map_fd = -1; + struct bpf_map_info map_info = {}; + struct bpf_btf_info info[2] = {}; + unsigned int raw_btf_size; + uint32_t info_len; + int err, i, ret; + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + for (i = 0; i < 2; i++) { + user_btf[i] = malloc(raw_btf_size); + if (CHECK(!user_btf[i], "!user_btf[%d]", i)) { + err = -1; + goto done; + } + info[i].btf = ptr_to_u64(user_btf[i]); + info[i].btf_size = raw_btf_size; + } + + btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */ + info_len = sizeof(info[0]); + err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len); + if (CHECK(err, "errno:%d", errno)) { + err = -1; + goto done; + } + + btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id); + if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + ret = 0; + err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len); + if (CHECK(err || info[0].id != info[1].id || + info[0].btf_size != info[1].btf_size || + (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)), + "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d", + err, errno, info[0].id, info[1].id, + info[0].btf_size, info[1].btf_size, ret)) { + err = -1; + goto done; + } + + /* Test btf members in struct bpf_map_info */ + create_attr.name = "test_btf_id"; + create_attr.map_type = BPF_MAP_TYPE_ARRAY; + create_attr.key_size = sizeof(int); + create_attr.value_size = sizeof(unsigned int); + create_attr.max_entries = 4; + create_attr.btf_fd = btf_fd[0]; + create_attr.btf_key_type_id = 1; + create_attr.btf_value_type_id = 2; + + map_fd = bpf_create_map_xattr(&create_attr); + if (CHECK(map_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (CHECK(err || map_info.btf_id != info[0].id || + map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2, + "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u", + err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id, + map_info.btf_value_type_id)) { + err = -1; + goto done; + } + + for (i = 0; i < 2; i++) { + close(btf_fd[i]); + btf_fd[i] = -1; + } + + /* Test BTF ID is removed from the kernel */ + btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); + if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + close(btf_fd[0]); + btf_fd[0] = -1; + + /* The map holds the last ref to BTF and its btf_id */ + close(map_fd); + map_fd = -1; + btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); + if (CHECK(btf_fd[0] != -1, "BTF lingers")) { + err = -1; + goto done; + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + if (map_fd != -1) + close(map_fd); + for (i = 0; i < 2; i++) { + free(user_btf[i]); + if (btf_fd[i] != -1) + close(btf_fd[i]); + } + + return err; +} + +static int do_test_get_info(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + unsigned int raw_btf_size, user_btf_size, expected_nbytes; + uint8_t *raw_btf = NULL, *user_btf = NULL; + struct bpf_btf_info info = {}; + int btf_fd = -1, err, ret; + uint32_t info_len; + + fprintf(stderr, "BTF GET_INFO test[%u] (%s): ", + test_num, test->descr); + + if (test->special_test) + return test->special_test(test_num); + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + user_btf = malloc(raw_btf_size); + if (CHECK(!user_btf, "!user_btf")) { + err = -1; + goto done; + } + + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + user_btf_size = (int)raw_btf_size + test->btf_size_delta; + expected_nbytes = min(raw_btf_size, user_btf_size); + if (raw_btf_size > expected_nbytes) + memset(user_btf + expected_nbytes, 0xff, + raw_btf_size - expected_nbytes); + + info_len = sizeof(info); + info.btf = ptr_to_u64(user_btf); + info.btf_size = user_btf_size; + + ret = 0; + err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len); + if (CHECK(err || !info.id || info_len != sizeof(info) || + info.btf_size != raw_btf_size || + (ret = memcmp(raw_btf, user_btf, expected_nbytes)), + "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d", + err, errno, info.id, info_len, sizeof(info), + raw_btf_size, info.btf_size, expected_nbytes, ret)) { + err = -1; + goto done; + } + + while (expected_nbytes < raw_btf_size) { + fprintf(stderr, "%u...", expected_nbytes); + if (CHECK(user_btf[expected_nbytes++] != 0xff, + "user_btf[%u]:%x != 0xff", expected_nbytes - 1, + user_btf[expected_nbytes - 1])) { + err = -1; + goto done; + } + } + + fprintf(stderr, "OK"); + +done: + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + free(raw_btf); + free(user_btf); + + if (btf_fd != -1) + close(btf_fd); + + return err; +} + +static int test_get_info(void) +{ + unsigned int i; + int err = 0; + + if (args.get_info_test_num) + return count_result(do_test_get_info(args.get_info_test_num)); + + for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) + err |= count_result(do_test_get_info(i)); + + return err; +} + +struct btf_file_test { + const char *file; + bool btf_kv_notfound; +}; + +static struct btf_file_test file_tests[] = { +{ + .file = "test_btf_haskv.o", +}, +{ + .file = "test_btf_nokv.o", + .btf_kv_notfound = true, +}, +}; + +static int file_has_btf_elf(const char *fn) +{ + Elf_Scn *scn = NULL; + GElf_Ehdr ehdr; + int elf_fd; + Elf *elf; + int ret; + + if (CHECK(elf_version(EV_CURRENT) == EV_NONE, + "elf_version(EV_CURRENT) == EV_NONE")) + return -1; + + elf_fd = open(fn, O_RDONLY); + if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno)) + return -1; + + elf = elf_begin(elf_fd, ELF_C_READ, NULL); + if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) { + ret = -1; + goto done; + } + + if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) { + ret = -1; + goto done; + } + + while ((scn = elf_nextscn(elf, scn))) { + const char *sh_name; + GElf_Shdr sh; + + if (CHECK(gelf_getshdr(scn, &sh) != &sh, + "file:%s gelf_getshdr != &sh", fn)) { + ret = -1; + goto done; + } + + sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!strcmp(sh_name, BTF_ELF_SEC)) { + ret = 1; + goto done; + } + } + + ret = 0; + +done: + close(elf_fd); + elf_end(elf); + return ret; +} + +static int do_test_file(unsigned int test_num) +{ + const struct btf_file_test *test = &file_tests[test_num - 1]; + struct bpf_object *obj = NULL; + struct bpf_program *prog; + struct bpf_map *map; + int err; + + fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, + test->file); + + err = file_has_btf_elf(test->file); + if (err == -1) + return err; + + if (err == 0) { + fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); + skip_cnt++; + return 0; + } + + obj = bpf_object__open(test->file); + if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) + return PTR_ERR(obj); + + err = bpf_object__btf_fd(obj); + if (CHECK(err == -1, "bpf_object__btf_fd: -1")) + goto done; + + prog = bpf_program__next(NULL, obj); + if (CHECK(!prog, "Cannot find bpf_prog")) { + err = -1; + goto done; + } + + bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); + err = bpf_object__load(obj); + if (CHECK(err < 0, "bpf_object__load: %d", err)) + goto done; + + map = bpf_object__find_map_by_name(obj, "btf_map"); + if (CHECK(!map, "btf_map not found")) { + err = -1; + goto done; + } + + err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0) + != test->btf_kv_notfound; + if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u", + bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map), + test->btf_kv_notfound)) + goto done; + + fprintf(stderr, "OK"); + +done: + bpf_object__close(obj); + return err; +} + +static int test_file(void) +{ + unsigned int i; + int err = 0; + + if (args.file_test_num) + return count_result(do_test_file(args.file_test_num)); + + for (i = 1; i <= ARRAY_SIZE(file_tests); i++) + err |= count_result(do_test_file(i)); + + return err; +} + +const char *pprint_enum_str[] = { + "ENUM_ZERO", + "ENUM_ONE", + "ENUM_TWO", + "ENUM_THREE", +}; + +struct pprint_mapv { + uint32_t ui32; + uint16_t ui16; + /* 2 bytes hole */ + int32_t si32; + uint32_t unused_bits2a:2, + bits28:28, + unused_bits2b:2; + union { + uint64_t ui64; + uint8_t ui8a[8]; + }; + enum { + ENUM_ZERO, + ENUM_ONE, + ENUM_TWO, + ENUM_THREE, + } aenum; +}; + +static struct btf_raw_test pprint_test = { + .descr = "BTF pretty print test #1", + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + /* 2 bits */ /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 2, 2), + /* 28 bits */ /* [7] */ + BTF_TYPE_INT_ENC(0, 0, 0, 28, 4), + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28), + BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ + BTF_END_RAW, + }, + .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum", + .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "pprint_test", + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}; + +static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i) +{ + v->ui32 = i; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; +} + +static int test_pprint(void) +{ + const struct btf_raw_test *test = &pprint_test; + struct bpf_create_map_attr create_attr = {}; + int map_fd = -1, btf_fd = -1; + struct pprint_mapv mapv = {}; + unsigned int raw_btf_size; + char expected_line[255]; + FILE *pin_file = NULL; + char pin_path[255]; + size_t line_len = 0; + char *line = NULL; + unsigned int key; + uint8_t *raw_btf; + ssize_t nread; + int err, ret; + + fprintf(stderr, "%s......", test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; + + map_fd = bpf_create_map_xattr(&create_attr); + if (CHECK(map_fd == -1, "errno:%d", errno)) { + err = -1; + goto done; + } + + ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", + "/sys/fs/bpf", test->map_name); + + if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + "/sys/fs/bpf", test->map_name)) { + err = -1; + goto done; + } + + err = bpf_obj_pin(map_fd, pin_path); + if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno)) + goto done; + + for (key = 0; key < test->max_entries; key++) { + set_pprint_mapv(&mapv, key); + bpf_map_update_elem(map_fd, &key, &mapv, 0); + } + + pin_file = fopen(pin_path, "r"); + if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) { + err = -1; + goto done; + } + + /* Skip lines start with '#' */ + while ((nread = getline(&line, &line_len, pin_file)) > 0 && + *line == '#') + ; + + if (CHECK(nread <= 0, "Unexpected EOF")) { + err = -1; + goto done; + } + + key = 0; + do { + ssize_t nexpected_line; + + set_pprint_mapv(&mapv, key); + nexpected_line = snprintf(expected_line, sizeof(expected_line), + "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", + key, + mapv.ui32, mapv.si32, + mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b, + mapv.ui64, + mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3], + mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7], + pprint_enum_str[mapv.aenum]); + + if (CHECK(nexpected_line == sizeof(expected_line), + "expected_line is too long")) { + err = -1; + goto done; + } + + if (strcmp(expected_line, line)) { + err = -1; + fprintf(stderr, "unexpected pprint output\n"); + fprintf(stderr, "expected: %s", expected_line); + fprintf(stderr, " read: %s", line); + goto done; + } + + nread = getline(&line, &line_len, pin_file); + } while (++key < test->max_entries && nread > 0); + + if (CHECK(key < test->max_entries, + "Unexpected EOF. key:%u test->max_entries:%u", + key, test->max_entries)) { + err = -1; + goto done; + } + + if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) { + err = -1; + goto done; + } + + err = 0; + +done: + if (!err) + fprintf(stderr, "OK"); + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); + if (pin_file) + fclose(pin_file); + unlink(pin_path); + free(line); + + return err; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), + ARRAY_SIZE(file_tests)); +} + +static int parse_args(int argc, char **argv) +{ + const char *optstr = "lpf:r:g:"; + int opt; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'l': + args.always_log = true; + break; + case 'f': + args.file_test_num = atoi(optarg); + args.file_test = true; + break; + case 'r': + args.raw_test_num = atoi(optarg); + args.raw_test = true; + break; + case 'g': + args.get_info_test_num = atoi(optarg); + args.get_info_test = true; + break; + case 'p': + args.pprint_test = true; + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return -1; + } + } + + if (args.raw_test_num && + (args.raw_test_num < 1 || + args.raw_test_num > ARRAY_SIZE(raw_tests))) { + fprintf(stderr, "BTF raw test number must be [1 - %zu]\n", + ARRAY_SIZE(raw_tests)); + return -1; + } + + if (args.file_test_num && + (args.file_test_num < 1 || + args.file_test_num > ARRAY_SIZE(file_tests))) { + fprintf(stderr, "BTF file test number must be [1 - %zu]\n", + ARRAY_SIZE(file_tests)); + return -1; + } + + if (args.get_info_test_num && + (args.get_info_test_num < 1 || + args.get_info_test_num > ARRAY_SIZE(get_info_tests))) { + fprintf(stderr, "BTF get info test number must be [1 - %zu]\n", + ARRAY_SIZE(get_info_tests)); + return -1; + } + + return 0; +} + +static void print_summary(void) +{ + fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n", + pass_cnt - skip_cnt, skip_cnt, error_cnt); +} + +int main(int argc, char **argv) +{ + int err = 0; + + err = parse_args(argc, argv); + if (err) + return err; + + if (args.always_log) + libbpf_set_print(__base_pr, __base_pr, __base_pr); + + if (args.raw_test) + err |= test_raw(); + + if (args.get_info_test) + err |= test_get_info(); + + if (args.file_test) + err |= test_file(); + + if (args.pprint_test) + err |= count_result(test_pprint()); + + if (args.raw_test || args.get_info_test || args.file_test || + args.pprint_test) + goto done; + + err |= test_raw(); + err |= test_get_info(); + err |= test_file(); + +done: + print_summary(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c new file mode 100644 index 000000000000..8c7ca096ecf2 --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +typedef int btf_map_key; +typedef struct ipv_counts btf_map_value; +btf_map_key dumm_key; +btf_map_value dummy_value; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c new file mode 100644 index 000000000000..0ed8e088eebf --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c new file mode 100644 index 000000000000..f6d9f238e00a --- /dev/null +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* Permit pretty deep stack traces */ +#define MAX_STACK_RAWTP 100 +struct stack_trace_t { + int pid; + int kern_stack_size; + int user_stack_size; + int user_stack_buildid_size; + __u64 kern_stack[MAX_STACK_RAWTP]; + __u64 user_stack[MAX_STACK_RAWTP]; + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; +}; + +struct bpf_map_def SEC("maps") perfmap = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") stackdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct stack_trace_t), + .max_entries = 1, +}; + +/* Allocate per-cpu space twice the needed. For the code below + * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + * if (usize < 0) + * return 0; + * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + * + * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), + * verifier will complain that access "raw_data + usize" + * with size "max_len - usize" may be out of bound. + * The maximum "raw_data + usize" is "raw_data + max_len" + * and the maximum "max_len - usize" is "max_len", verifier + * concludes that the maximum buffer access range is + * "raw_data[0...max_len * 2 - 1]" and hence reject the program. + * + * Doubling the to-be-used max buffer size can fix this verifier + * issue and avoid complicated C programming massaging. + * This is an acceptable workaround since there is one entry here. + */ +struct bpf_map_def SEC("maps") rawdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, + .max_entries = 1, +}; + +SEC("tracepoint/raw_syscalls/sys_enter") +int bpf_prog1(void *ctx) +{ + int max_len, max_buildid_len, usize, ksize, total_size; + struct stack_trace_t *data; + void *raw_data; + __u32 key = 0; + + data = bpf_map_lookup_elem(&stackdata_map, &key); + if (!data) + return 0; + + max_len = MAX_STACK_RAWTP * sizeof(__u64); + max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); + data->pid = bpf_get_current_pid_tgid(); + data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, + max_len, 0); + data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, + BPF_F_USER_STACK); + data->user_stack_buildid_size = bpf_get_stack( + ctx, data->user_stack_buildid, max_buildid_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); + + /* write both kernel and user stacks to the same buffer */ + raw_data = bpf_map_lookup_elem(&rawdata_map, &key); + if (!raw_data) + return 0; + + usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + if (usize < 0) + return 0; + + ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + if (ksize < 0) + return 0; + + total_size = usize + ksize; + if (total_size > 0 && total_size <= max_len) + bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index 35669ccd4d23..9df0d2ac45f8 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -1,6 +1,15 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ "$(id -u)" != "0" ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + SRC_TREE=../../../../ test_run() diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh new file mode 100755 index 000000000000..677686198df3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +modprobe rc-loopback + +for i in /sys/class/rc/rc* +do + if grep -q DRV_NAME=rc-loopback $i/uevent + then + LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q) + fi +done + +if [ -n $LIRCDEV ]; +then + TYPE=lirc_mode2 + ./test_lirc_mode2_user $LIRCDEV + ret=$? + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + else + echo -e ${GREEN}"PASS: $TYPE"${NC} + fi +fi diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c new file mode 100644 index 000000000000..ba26855563a5 --- /dev/null +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// test ir decoder +// +// Copyright (C) 2018 Sean Young <sean@mess.org> + +#include <linux/bpf.h> +#include <linux/lirc.h> +#include "bpf_helpers.h" + +SEC("lirc_mode2") +int bpf_decoder(unsigned int *sample) +{ + if (LIRC_IS_PULSE(*sample)) { + unsigned int duration = LIRC_VALUE(*sample); + + if (duration & 0x10000) + bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c new file mode 100644 index 000000000000..d470d63c33db --- /dev/null +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +// test ir decoder +// +// Copyright (C) 2018 Sean Young <sean@mess.org> + +// A lirc chardev is a device representing a consumer IR (cir) device which +// can receive infrared signals from remote control and/or transmit IR. +// +// IR is sent as a series of pulses and space somewhat like morse code. The +// BPF program can decode this into scancodes so that rc-core can translate +// this into input key codes using the rc keymap. +// +// This test works by sending IR over rc-loopback, so the IR is processed by +// BPF and then decoded into scancodes. The lirc chardev must be the one +// associated with rc-loopback, see the output of ir-keytable(1). +// +// The following CONFIG options must be enabled for the test to succeed: +// CONFIG_RC_CORE=y +// CONFIG_BPF_RAWIR_EVENT=y +// CONFIG_RC_LOOPBACK=y + +// Steps: +// 1. Open the /dev/lircN device for rc-loopback (given on command line) +// 2. Attach bpf_lirc_mode2 program which decodes some IR. +// 3. Send some IR to the same IR device; since it is loopback, this will +// end up in the bpf program +// 4. bpf program should decode IR and report keycode +// 5. We can read keycode from same /dev/lirc device + +#include <linux/bpf.h> +#include <linux/lirc.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <poll.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "bpf_util.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +int main(int argc, char **argv) +{ + struct bpf_object *obj; + int ret, lircfd, progfd, mode; + int testir = 0x1dead; + u32 prog_ids[10], prog_flags[10], prog_cnt; + + if (argc != 2) { + printf("Usage: %s /dev/lircN\n", argv[0]); + return 2; + } + + ret = bpf_prog_load("test_lirc_mode2_kern.o", + BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); + if (ret) { + printf("Failed to load bpf program\n"); + return 1; + } + + lircfd = open(argv[1], O_RDWR | O_NONBLOCK); + if (lircfd == -1) { + printf("failed to open lirc device %s: %m\n", argv[1]); + return 1; + } + + /* Let's try detach it before it was ever attached */ + ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2); + if (ret != -1 || errno != ENOENT) { + printf("bpf_prog_detach2 not attached should fail: %m\n"); + return 1; + } + + mode = LIRC_MODE_SCANCODE; + if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) { + printf("failed to set rec mode: %m\n"); + return 1; + } + + prog_cnt = 10; + ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, + &prog_cnt); + if (ret) { + printf("Failed to query bpf programs on lirc device: %m\n"); + return 1; + } + + if (prog_cnt != 0) { + printf("Expected nothing to be attached\n"); + return 1; + } + + ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0); + if (ret) { + printf("Failed to attach bpf to lirc device: %m\n"); + return 1; + } + + /* Write raw IR */ + ret = write(lircfd, &testir, sizeof(testir)); + if (ret != sizeof(testir)) { + printf("Failed to send test IR message: %m\n"); + return 1; + } + + struct pollfd pfd = { .fd = lircfd, .events = POLLIN }; + struct lirc_scancode lsc; + + poll(&pfd, 1, 100); + + /* Read decoded IR */ + ret = read(lircfd, &lsc, sizeof(lsc)); + if (ret != sizeof(lsc)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (lsc.scancode != 0xdead || lsc.rc_proto != 64) { + printf("Incorrect scancode decoded\n"); + return 1; + } + + prog_cnt = 10; + ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, + &prog_cnt); + if (ret) { + printf("Failed to query bpf programs on lirc device: %m\n"); + return 1; + } + + if (prog_cnt != 1) { + printf("Expected one program to be attached\n"); + return 1; + } + + /* Let's try detaching it now it is actually attached */ + ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2); + if (ret) { + printf("bpf_prog_detach2: returned %m\n"); + return 1; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c new file mode 100644 index 000000000000..0575751bc1bc --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c @@ -0,0 +1,437 @@ +#include <stddef.h> +#include <inttypes.h> +#include <errno.h> +#include <linux/seg6_local.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* Packet parsing state machine helpers. */ +#define cursor_advance(_cursor, _len) \ + ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) + +#define SR6_FLAG_ALERT (1 << 4) + +#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ + 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) +#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ + 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) +#define BPF_PACKET_HEADER __attribute__((packed)) + +struct ip6_t { + unsigned int ver:4; + unsigned int priority:8; + unsigned int flow_label:20; + unsigned short payload_len; + unsigned char next_header; + unsigned char hop_limit; + unsigned long long src_hi; + unsigned long long src_lo; + unsigned long long dst_hi; + unsigned long long dst_lo; +} BPF_PACKET_HEADER; + +struct ip6_addr_t { + unsigned long long hi; + unsigned long long lo; +} BPF_PACKET_HEADER; + +struct ip6_srh_t { + unsigned char nexthdr; + unsigned char hdrlen; + unsigned char type; + unsigned char segments_left; + unsigned char first_segment; + unsigned char flags; + unsigned short tag; + + struct ip6_addr_t segments[0]; +} BPF_PACKET_HEADER; + +struct sr6_tlv_t { + unsigned char type; + unsigned char len; + unsigned char value[0]; +} BPF_PACKET_HEADER; + +__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +{ + void *cursor, *data_end; + struct ip6_srh_t *srh; + struct ip6_t *ip; + uint8_t *ipver; + + data_end = (void *)(long)skb->data_end; + cursor = (void *)(long)skb->data; + ipver = (uint8_t *)cursor; + + if ((void *)ipver + sizeof(*ipver) > data_end) + return NULL; + + if ((*ipver >> 4) != 6) + return NULL; + + ip = cursor_advance(cursor, sizeof(*ip)); + if ((void *)ip + sizeof(*ip) > data_end) + return NULL; + + if (ip->next_header != 43) + return NULL; + + srh = cursor_advance(cursor, sizeof(*srh)); + if ((void *)srh + sizeof(*srh) > data_end) + return NULL; + + if (srh->type != 4) + return NULL; + + return srh; +} + +__attribute__((always_inline)) +int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, + uint32_t old_pad, uint32_t pad_off) +{ + int err; + + if (new_pad != old_pad) { + err = bpf_lwt_seg6_adjust_srh(skb, pad_off, + (int) new_pad - (int) old_pad); + if (err) + return err; + } + + if (new_pad > 0) { + char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0}; + struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; + + pad_tlv->type = SR6_TLV_PADDING; + pad_tlv->len = new_pad - 2; + + err = bpf_lwt_seg6_store_bytes(skb, pad_off, + (void *)pad_tlv_buf, new_pad); + if (err) + return err; + } + + return 0; +} + +__attribute__((always_inline)) +int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t *tlv_off, uint32_t *pad_size, + uint32_t *pad_off) +{ + uint32_t srh_off, cur_off; + int offset_valid = 0; + int err; + + srh_off = (char *)srh - (char *)(long)skb->data; + // cur_off = end of segments, start of possible TLVs + cur_off = srh_off + sizeof(*srh) + + sizeof(struct ip6_addr_t) * (srh->first_segment + 1); + + *pad_off = 0; + + // we can only go as far as ~10 TLVs due to the BPF max stack size + #pragma clang loop unroll(full) + for (int i = 0; i < 10; i++) { + struct sr6_tlv_t tlv; + + if (cur_off == *tlv_off) + offset_valid = 1; + + if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) + break; + + err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); + if (err) + return err; + + if (tlv.type == SR6_TLV_PADDING) { + *pad_size = tlv.len + sizeof(tlv); + *pad_off = cur_off; + + if (*tlv_off == srh_off) { + *tlv_off = cur_off; + offset_valid = 1; + } + break; + + } else if (tlv.type == SR6_TLV_HMAC) { + break; + } + + cur_off += sizeof(tlv) + tlv.len; + } // we reached the padding or HMAC TLVs, or the end of the SRH + + if (*pad_off == 0) + *pad_off = cur_off; + + if (*tlv_off == -1) + *tlv_off = cur_off; + else if (!offset_valid) + return -EINVAL; + + return 0; +} + +__attribute__((always_inline)) +int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, + struct sr6_tlv_t *itlv, uint8_t tlv_size) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + uint32_t partial_srh_len; + int err; + + if (tlv_off != -1) + tlv_off += srh_off; + + if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) + return -EINVAL; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); + if (err) + return err; + + err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); + if (err) + return err; + + // the following can't be moved inside update_tlv_pad because the + // bpf verifier has some issues with it + pad_off += sizeof(*itlv) + itlv->len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t tlv_off) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t partial_srh_len; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + struct sr6_tlv_t tlv; + int err; + + tlv_off += srh_off; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); + if (err) + return err; + + pad_off -= sizeof(tlv) + tlv.len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) +{ + int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + + ((srh->first_segment + 1) << 4); + struct sr6_tlv_t tlv; + + if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) + return 0; + + if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { + struct ip6_addr_t egr_addr; + + if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) + return 0; + + // check if egress TLV value is correct + if (ntohll(egr_addr.hi) == 0xfd00000000000000 && + ntohll(egr_addr.lo) == 0x4) + return 1; + } + + return 0; +} + +// This function will push a SRH with segments fd00::1, fd00::2, fd00::3, +// fd00::4 +SEC("encap_srh") +int __encap_srh(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfd00000000000000; + struct ip6_addr_t *seg; + struct ip6_srh_t *srh; + char srh_buf[72]; // room for 4 segments + int err; + + srh = (struct ip6_srh_t *)srh_buf; + srh->nexthdr = 0; + srh->hdrlen = 8; + srh->type = 4; + srh->segments_left = 3; + srh->first_segment = 3; + srh->flags = 0; + srh->tag = 0; + + seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); + + #pragma clang loop unroll(full) + for (unsigned long long lo = 0; lo < 4; lo++) { + seg->lo = htonll(4 - lo); + seg->hi = htonll(hi); + seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); + } + + err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +// Add an Egress TLV fc00::4, add the flag A, +// and apply End.X action to fc42::1 +SEC("add_egr_x") +int __add_egr_x(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfc42000000000000; + unsigned long long lo = 0x1; + struct ip6_srh_t *srh = get_srh(skb); + uint8_t new_flags = SR6_FLAG_ALERT; + struct ip6_addr_t addr; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; + + err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, + (struct sr6_tlv_t *)&tlv, 20); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + err = bpf_lwt_seg6_store_bytes(skb, offset, + (void *)&new_flags, sizeof(new_flags)); + if (err) + return BPF_DROP; + + addr.lo = htonll(lo); + addr.hi = htonll(hi); + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, + (void *)&addr, sizeof(addr)); + if (err) + return BPF_DROP; + return BPF_REDIRECT; +} + +// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a +// simple End action +SEC("pop_egr") +int __pop_egr(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + uint16_t new_tag = bpf_htons(2442); + uint8_t new_flags = 0; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != SR6_FLAG_ALERT) + return BPF_DROP; + + if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV + return BPF_DROP; + + if (!has_egr_tlv(skb, srh)) + return BPF_DROP; + + err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, + sizeof(new_flags))) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, + sizeof(new_tag))) + return BPF_DROP; + + return BPF_OK; +} + +// Inspect if the Egress TLV and flag have been removed, if the tag is correct, +// then apply a End.T action to reach the last segment +SEC("inspect_t") +int __inspect_t(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + int table = 117; + int err; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != 0) + return BPF_DROP; + + if (srh->tag != bpf_htons(2442)) + return BPF_DROP; + + if (srh->hdrlen != 8) // 4 segments + return BPF_DROP; + + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, + (void *)&table, sizeof(table)); + + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh new file mode 100755 index 000000000000..270fa8f49573 --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -0,0 +1,149 @@ +#!/bin/bash +# Connects 6 network namespaces through veths. +# Each NS may have different IPv6 global scope addresses : +# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6 +# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6 +# fc42::1 fd00::4 +# +# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a +# IPv6 header with a Segment Routing Header, with segments : +# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 +# +# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : +# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 +# - fd00::2 : remove the TLV, change the flags, add a tag +# - fd00::3 : apply an End.T action to fd00::4, through routing table 117 +# +# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. +# Each End.BPF action will validate the operations applied on the SRH by the +# previous BPF program in the chain, otherwise the packet is dropped. +# +# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this +# datagram can be read on NS6 when binding to fb00::6. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +TMP_FILE="/tmp/selftest_lwt_seg6local.txt" + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_lwt_seg6local [PASS]"; + else + echo "selftests: test_lwt_seg6local [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null + ip netns del ns3 2> /dev/null + ip netns del ns4 2> /dev/null + ip netns del ns5 2> /dev/null + ip netns del ns6 2> /dev/null + rm -f $TMP_FILE +} + +set -e + +ip netns add ns1 +ip netns add ns2 +ip netns add ns3 +ip netns add ns4 +ip netns add ns5 +ip netns add ns6 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 type veth peer name veth2 +ip link add veth3 type veth peer name veth4 +ip link add veth5 type veth peer name veth6 +ip link add veth7 type veth peer name veth8 +ip link add veth9 type veth peer name veth10 + +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 +ip link set veth3 netns ns2 +ip link set veth4 netns ns3 +ip link set veth5 netns ns3 +ip link set veth6 netns ns4 +ip link set veth7 netns ns4 +ip link set veth8 netns ns5 +ip link set veth9 netns ns5 +ip link set veth10 netns ns6 + +ip netns exec ns1 ip link set dev veth1 up +ip netns exec ns2 ip link set dev veth2 up +ip netns exec ns2 ip link set dev veth3 up +ip netns exec ns3 ip link set dev veth4 up +ip netns exec ns3 ip link set dev veth5 up +ip netns exec ns4 ip link set dev veth6 up +ip netns exec ns4 ip link set dev veth7 up +ip netns exec ns5 ip link set dev veth8 up +ip netns exec ns5 ip link set dev veth9 up +ip netns exec ns6 ip link set dev veth10 up +ip netns exec ns6 ip link set dev lo up + +# All link scope addresses and routes required between veths +ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link +ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link +ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link +ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link +ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link +ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link +ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link +ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link +ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link +ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link +ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link +ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link +ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link +ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link +ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link +ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link + +ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo +ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21 + +ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link + +ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 +ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4 + +ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ns4 ip -6 addr add fc42::1 dev lo +ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 + +ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 +ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8 + +ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo +ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo + +ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + +ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null +ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null +ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null + +ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & +ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" +sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment +kill -INT $! + +if [[ $(< $TMP_FILE) != "foobar" ]]; then + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index e78aad0a68bb..be800d0e7a84 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True): def bpftool_prog_list(expected=None, ns=""): _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + # Remove the base progs + for p in base_progs: + if p in progs: + progs.remove(p) if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + # Remove the base maps + for m in base_maps: + if m in maps: + maps.remove(m) if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % @@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root") # Check tools ret, progs = bpftool("prog", fail=False) skip(ret != 0, "bpftool not installed") -# Check no BPF programs are loaded -skip(len(progs) != 0, "BPF programs already loaded on the system") +base_progs = progs +_, base_maps = bpftool("map") # Check netdevsim ret, out = cmd("modprobe netdevsim", fail=False) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4123d0ab90ba..0ef68204c84b 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -38,8 +38,10 @@ typedef __u16 __sum16; #include "bpf_util.h" #include "bpf_endian.h" #include "bpf_rlimit.h" +#include "trace_helpers.h" static int error_cnt, pass_cnt; +static bool jit_enabled; #define MAGIC_BYTES 123 @@ -166,6 +168,37 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_tail(void) +{ + const char *file = "./test_adjust_tail.o"; + struct bpf_object *obj; + char buf[128]; + __u32 duration, retval, size; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + CHECK(err || errno || retval != XDP_DROP, + "ipv4", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != XDP_TX || size != 54, + "ipv6", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + bpf_object__close(obj); +} + + + #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 #define VIP_NUM 5 @@ -360,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static bool is_jit_enabled(void) +{ + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + bool enabled = false; + int sysctl_fd; + + sysctl_fd = open(jit_sysctl, 0, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + enabled = (tmpc != '0'); + close(sysctl_fd); + } + + return enabled; +} + static void test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; const int nr_iters = 2; const char *file = "./test_obj_id.o"; - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; const char *expected_prog_name = "test_obj_id"; const char *expected_map_name = "test_map_id"; const __u64 nsec_per_sec = 1000000000; @@ -383,20 +433,11 @@ static void test_bpf_obj_id(void) char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; struct timespec real_time_ts, boot_time_ts; - int sysctl_fd, jit_enabled = 0, err = 0; + int err = 0; __u64 array_value; uid_t my_uid = getuid(); time_t now, load_time; - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - jit_enabled = (tmpc != '0'); - close(sysctl_fd); - } - err = bpf_prog_get_fd_by_id(0); CHECK(err >= 0 || errno != ENOENT, "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); @@ -865,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd) return 0; } +static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len) +{ + __u32 key, next_key, *cur_key_p, *next_key_p; + char *val_buf1, *val_buf2; + int i, err = 0; + + val_buf1 = malloc(stack_trace_len); + val_buf2 = malloc(stack_trace_len); + cur_key_p = NULL; + next_key_p = &key; + while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) { + err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1); + if (err) + goto out; + err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2); + if (err) + goto out; + for (i = 0; i < stack_trace_len; i++) { + if (val_buf1[i] != val_buf2[i]) { + err = -1; + goto out; + } + } + key = *next_key_p; + cur_key_p = &key; + next_key_p = &next_key; + } + if (errno != ENOENT) + err = -1; + +out: + free(val_buf1); + free(val_buf2); + return err; +} + static void test_stacktrace_map() { - int control_map_fd, stackid_hmap_fd, stackmap_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_map.o"; - int bytes, efd, err, pmu_fd, prog_fd; + int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; struct perf_event_attr attr = {}; __u32 key, val, duration = 0; struct bpf_object *obj; @@ -925,6 +1002,10 @@ static void test_stacktrace_map() if (stackmap_fd < 0) goto disable_pmu; + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + if (stack_amap_fd < 0) + goto disable_pmu; + /* give some time for bpf program run */ sleep(1); @@ -946,6 +1027,12 @@ static void test_stacktrace_map() "err %d errno %d\n", err, errno)) goto disable_pmu_noerr; + stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); + if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", + "err %d errno %d\n", err, errno)) + goto disable_pmu_noerr; + goto disable_pmu_noerr; disable_pmu: error_cnt++; @@ -1039,9 +1126,9 @@ err: static void test_stacktrace_build_id(void) { - int control_map_fd, stackid_hmap_fd, stackmap_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_build_id.o"; - int bytes, efd, err, pmu_fd, prog_fd; + int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; struct perf_event_attr attr = {}; __u32 key, previous_key, val, duration = 0; struct bpf_object *obj; @@ -1106,6 +1193,11 @@ static void test_stacktrace_build_id(void) err, errno)) goto disable_pmu; + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); assert(system("./urandom_read") == 0); @@ -1157,8 +1249,15 @@ static void test_stacktrace_build_id(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); - CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map\n"); + if (CHECK(build_id_matches < 1, "build id match", + "Didn't find expected build ID from the map\n")) + goto disable_pmu; + + stack_trace_len = PERF_MAX_STACK_DEPTH + * sizeof(struct bpf_stack_build_id); + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); + CHECK(err, "compare_stack_ips stackmap vs. stack_amap", + "err %d errno %d\n", err, errno); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); @@ -1173,10 +1272,439 @@ out: return; } +static void test_stacktrace_build_id_nmi(void) +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; + const char *file = "./test_stacktrace_build_id.o"; + int err, pmu_fd, prog_fd; + struct perf_event_attr attr = { + .sample_freq = 5000, + .freq = 1, + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + __u32 key, previous_key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + int i, j; + struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; + int build_id_matches = 0; + + err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + return; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", + "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (CHECK(control_map_fd < 0, "bpf_find_map control_map", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") + == 0); + assert(system("taskset 0x1 ./urandom_read 100000") == 0); + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = extract_build_id(buf, 256); + + if (CHECK(err, "get build_id with readelf", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + if (CHECK(err, "get_next_key from stackmap", + "err %d, errno %d\n", err, errno)) + goto disable_pmu; + + do { + char build_id[64]; + + err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); + if (CHECK(err, "lookup_elem from stackmap", + "err %d, errno %d\n", err, errno)) + goto disable_pmu; + for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) + if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && + id_offs[i].offset != 0) { + for (j = 0; j < 20; ++j) + sprintf(build_id + 2 * j, "%02x", + id_offs[i].build_id[j] & 0xff); + if (strstr(buf, build_id) != NULL) + build_id_matches = 1; + } + previous_key = key; + } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + + if (CHECK(build_id_matches < 1, "build id match", + "Didn't find expected build ID from the map\n")) + goto disable_pmu; + + /* + * We intentionally skip compare_stack_ips(). This is because we + * only support one in_nmi() ips-to-build_id translation per cpu + * at any time, thus stack_amap here will always fallback to + * BPF_STACK_BUILD_ID_IP; + */ + +disable_pmu: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + +close_pmu: + close(pmu_fd); + +close_prog: + bpf_object__close(obj); +} + +#define MAX_CNT_RAWTP 10ull +#define MAX_STACK_RAWTP 100 +struct get_stack_trace_t { + int pid; + int kern_stack_size; + int user_stack_size; + int user_stack_buildid_size; + __u64 kern_stack[MAX_STACK_RAWTP]; + __u64 user_stack[MAX_STACK_RAWTP]; + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; +}; + +static int get_stack_print_output(void *data, int size) +{ + bool good_kern_stack = false, good_user_stack = false; + const char *nonjit_func = "___bpf_prog_run"; + struct get_stack_trace_t *e = data; + int i, num_stack; + static __u64 cnt; + struct ksym *ks; + + cnt++; + + if (size < sizeof(struct get_stack_trace_t)) { + __u64 *raw_data = data; + bool found = false; + + num_stack = size / sizeof(__u64); + /* If jit is enabled, we do not have a good way to + * verify the sanity of the kernel stack. So we + * just assume it is good if the stack is not empty. + * This could be improved in the future. + */ + if (jit_enabled) { + found = num_stack > 0; + } else { + for (i = 0; i < num_stack; i++) { + ks = ksym_search(raw_data[i]); + if (strcmp(ks->name, nonjit_func) == 0) { + found = true; + break; + } + } + } + if (found) { + good_kern_stack = true; + good_user_stack = true; + } + } else { + num_stack = e->kern_stack_size / sizeof(__u64); + if (jit_enabled) { + good_kern_stack = num_stack > 0; + } else { + for (i = 0; i < num_stack; i++) { + ks = ksym_search(e->kern_stack[i]); + if (strcmp(ks->name, nonjit_func) == 0) { + good_kern_stack = true; + break; + } + } + } + if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) + good_user_stack = true; + } + if (!good_kern_stack || !good_user_stack) + return LIBBPF_PERF_EVENT_ERROR; + + if (cnt == MAX_CNT_RAWTP) + return LIBBPF_PERF_EVENT_DONE; + + return LIBBPF_PERF_EVENT_CONT; +} + +static void test_get_stack_raw_tp(void) +{ + const char *file = "./test_get_stack_rawtp.o"; + int i, efd, err, prog_fd, pmu_fd, perfmap_fd; + struct perf_event_attr attr = {}; + struct timespec tv = {0, 10}; + __u32 key = 0, duration = 0; + struct bpf_object *obj; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + perfmap_fd = bpf_find_map(__func__, obj, "perfmap"); + if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + perfmap_fd, errno)) + goto close_prog; + + err = load_kallsyms(); + if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno)) + goto close_prog; + + attr.sample_type = PERF_SAMPLE_RAW; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_BPF_OUTPUT; + pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/, + -1/*group_fd*/, 0); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, + errno)) + goto close_prog; + + err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY); + if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err, + errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n", + err, errno)) + goto close_prog; + + err = perf_event_mmap(pmu_fd); + if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno)) + goto close_prog; + + /* trigger some syscall action */ + for (i = 0; i < MAX_CNT_RAWTP; i++) + nanosleep(&tv, NULL); + + err = perf_event_poller(pmu_fd, get_stack_print_output); + if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void test_task_fd_query_rawtp(void) +{ + const char *file = "./test_get_stack_rawtp.o"; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + struct bpf_object *obj; + int efd, err, prog_fd; + __u32 duration = 0; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + /* query (getpid(), efd) */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err, + errno)) + goto close_prog; + + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + strcmp(buf, "sys_enter") == 0; + if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n", + fd_type, buf)) + goto close_prog; + + /* test zero len */ + len = 0; + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n", + err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter"); + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + /* test empty buffer */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n", + err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter"); + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + /* test smaller buffer */ + len = 3; + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)", + "err %d errno %d\n", err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter") && + strcmp(buf, "sy") == 0; + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void test_task_fd_query_tp_core(const char *probe_name, + const char *tp_name) +{ + const char *file = "./test_tracepoint.o"; + int err, bytes, efd, prog_fd, pmu_fd; + struct perf_event_attr attr = {}; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + struct bpf_object *obj; + __u32 duration = 0; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + goto close_prog; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read", + "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + /* query (getpid(), pmu_fd) */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name); + if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n", + fd_type, buf)) + goto close_pmu; + + close(pmu_fd); + goto close_prog_noerr; + +close_pmu: + close(pmu_fd); +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void test_task_fd_query_tp(void) +{ + test_task_fd_query_tp_core("sched/sched_switch", + "sched_switch"); + test_task_fd_query_tp_core("syscalls/sys_enter_read", + "sys_enter_read"); +} + int main(void) { + jit_enabled = is_jit_enabled(); + test_pkt_access(); test_xdp(); + test_xdp_adjust_tail(); test_l4lb_all(); test_xdp_noinline(); test_tcp_estats(); @@ -1186,7 +1714,11 @@ int main(void) test_tp_attach_query(); test_stacktrace_map(); test_stacktrace_build_id(); + test_stacktrace_build_id_nmi(); test_stacktrace_map_raw_tp(); + test_get_stack_raw_tp(); + test_task_fd_query_rawtp(); + test_task_fd_query_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 2950f80ba7fb..a5e76b9219b9 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook +#define _GNU_SOURCE + #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <arpa/inet.h> +#include <netinet/in.h> #include <sys/types.h> +#include <sys/select.h> #include <sys/socket.h> #include <linux/filter.h> @@ -17,34 +21,465 @@ #include "cgroup_helpers.h" #include "bpf_rlimit.h" +#ifndef ENOTSUPP +# define ENOTSUPP 524 +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" #define CONNECT6_PROG_PATH "./connect6_prog.o" +#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o" +#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o" #define SERV4_IP "192.168.1.254" #define SERV4_REWRITE_IP "127.0.0.1" +#define SRC4_IP "172.16.0.1" +#define SRC4_REWRITE_IP "127.0.0.4" #define SERV4_PORT 4040 #define SERV4_REWRITE_PORT 4444 #define SERV6_IP "face:b00c:1234:5678::abcd" #define SERV6_REWRITE_IP "::1" +#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4" +#define SRC6_IP "::1" +#define SRC6_REWRITE_IP "::6" #define SERV6_PORT 6060 #define SERV6_REWRITE_PORT 6666 #define INET_NTOP_BUF 40 -typedef int (*load_fn)(enum bpf_attach_type, const char *comment); +struct sock_addr_test; + +typedef int (*load_fn)(const struct sock_addr_test *test); typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); -struct program { - enum bpf_attach_type type; - load_fn loadfn; - int fd; - const char *name; - enum bpf_attach_type invalid_type; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct sock_addr_test { + const char *descr; + /* BPF prog properties */ + load_fn loadfn; + enum bpf_attach_type expected_attach_type; + enum bpf_attach_type attach_type; + /* Socket properties */ + int domain; + int type; + /* IP:port pairs for BPF prog to override */ + const char *requested_ip; + unsigned short requested_port; + const char *expected_ip; + unsigned short expected_port; + const char *expected_src_ip; + /* Expected test result */ + enum { + LOAD_REJECT, + ATTACH_REJECT, + SYSCALL_EPERM, + SYSCALL_ENOTSUPP, + SUCCESS, + } expected_result; }; -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static int bind4_prog_load(const struct sock_addr_test *test); +static int bind6_prog_load(const struct sock_addr_test *test); +static int connect4_prog_load(const struct sock_addr_test *test); +static int connect6_prog_load(const struct sock_addr_test *test); +static int sendmsg_deny_prog_load(const struct sock_addr_test *test); +static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); +static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); +static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); +static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); +static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); + +static struct sock_addr_test tests[] = { + /* bind */ + { + "bind4: load prog with wrong expected attach type", + bind4_prog_load, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_BIND, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "bind4: attach prog with wrong attach type", + bind4_prog_load, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "bind4: rewrite IP & TCP port in", + bind4_prog_load, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET4_BIND, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + "bind4: rewrite IP & UDP port in", + bind4_prog_load, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET4_BIND, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + "bind6: load prog with wrong expected attach type", + bind6_prog_load, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + AF_INET6, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "bind6: attach prog with wrong attach type", + bind6_prog_load, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_BIND, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "bind6: rewrite IP & TCP port in", + bind6_prog_load, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET6_BIND, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + "bind6: rewrite IP & UDP port in", + bind6_prog_load, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET6_BIND, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + + /* connect */ + { + "connect4: load prog with wrong expected attach type", + connect4_prog_load, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET4_CONNECT, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "connect4: attach prog with wrong attach type", + connect4_prog_load, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "connect4: rewrite IP & TCP port", + connect4_prog_load, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET4_CONNECT, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + "connect4: rewrite IP & UDP port", + connect4_prog_load, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET4_CONNECT, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + "connect6: load prog with wrong expected attach type", + connect6_prog_load, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + AF_INET6, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "connect6: attach prog with wrong attach type", + connect6_prog_load, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET4_CONNECT, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "connect6: rewrite IP & TCP port", + connect6_prog_load, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET6_CONNECT, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + "connect6: rewrite IP & UDP port", + connect6_prog_load, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET6_CONNECT, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + + /* sendmsg */ + { + "sendmsg4: load prog with wrong expected attach type", + sendmsg4_rw_asm_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP4_SENDMSG, + AF_INET, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "sendmsg4: attach prog with wrong attach type", + sendmsg4_rw_asm_prog_load, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "sendmsg4: rewrite IP & port (asm)", + sendmsg4_rw_asm_prog_load, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP4_SENDMSG, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg4: rewrite IP & port (C)", + sendmsg4_rw_c_prog_load, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP4_SENDMSG, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg4: deny call", + sendmsg_deny_prog_load, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP4_SENDMSG, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + "sendmsg6: load prog with wrong expected attach type", + sendmsg6_rw_asm_prog_load, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + "sendmsg6: attach prog with wrong attach type", + sendmsg6_rw_asm_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP4_SENDMSG, + AF_INET6, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + "sendmsg6: rewrite IP & port (asm)", + sendmsg6_rw_asm_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg6: rewrite IP & port (C)", + sendmsg6_rw_c_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg6: IPv4-mapped IPv6", + sendmsg6_rw_v4mapped_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_ENOTSUPP, + }, + { + "sendmsg6: deny call", + sendmsg_deny_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, +}; static int mk_sockaddr(int domain, const char *ip, unsigned short port, struct sockaddr *addr, socklen_t addr_len) @@ -84,25 +519,23 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port, return 0; } -static int load_insns(enum bpf_attach_type attach_type, - const struct bpf_insn *insns, size_t insns_cnt, - const char *comment) +static int load_insns(const struct sock_addr_test *test, + const struct bpf_insn *insns, size_t insns_cnt) { struct bpf_load_program_attr load_attr; int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; - load_attr.expected_attach_type = attach_type; + load_attr.expected_attach_type = test->expected_attach_type; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = "GPL"; ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); - if (ret < 0 && comment) { - log_err(">>> Loading %s program error.\n" - ">>> Output from verifier:\n%s\n-------\n", - comment, bpf_log_buf); + if (ret < 0 && test->expected_result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); } return ret; @@ -119,8 +552,7 @@ static int load_insns(enum bpf_attach_type attach_type, * to count jumps properly. */ -static int bind4_prog_load(enum bpf_attach_type attach_type, - const char *comment) +static int bind4_prog_load(const struct sock_addr_test *test) { union { uint8_t u4_addr8[4]; @@ -186,12 +618,10 @@ static int bind4_prog_load(enum bpf_attach_type attach_type, BPF_EXIT_INSN(), }; - return load_insns(attach_type, insns, - sizeof(insns) / sizeof(struct bpf_insn), comment); + return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } -static int bind6_prog_load(enum bpf_attach_type attach_type, - const char *comment) +static int bind6_prog_load(const struct sock_addr_test *test) { struct sockaddr_in6 addr6_rw; struct in6_addr ip6; @@ -254,13 +684,10 @@ static int bind6_prog_load(enum bpf_attach_type attach_type, BPF_EXIT_INSN(), }; - return load_insns(attach_type, insns, - sizeof(insns) / sizeof(struct bpf_insn), comment); + return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } -static int connect_prog_load_path(const char *path, - enum bpf_attach_type attach_type, - const char *comment) +static int load_path(const struct sock_addr_test *test, const char *path) { struct bpf_prog_load_attr attr; struct bpf_object *obj; @@ -269,75 +696,218 @@ static int connect_prog_load_path(const char *path, memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); attr.file = path; attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; - attr.expected_attach_type = attach_type; + attr.expected_attach_type = test->expected_attach_type; if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { - if (comment) - log_err(">>> Loading %s program at %s error.\n", - comment, path); + if (test->expected_result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", path); return -1; } return prog_fd; } -static int connect4_prog_load(enum bpf_attach_type attach_type, - const char *comment) +static int connect4_prog_load(const struct sock_addr_test *test) { - return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment); + return load_path(test, CONNECT4_PROG_PATH); } -static int connect6_prog_load(enum bpf_attach_type attach_type, - const char *comment) +static int connect6_prog_load(const struct sock_addr_test *test) { - return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment); + return load_path(test, CONNECT6_PROG_PATH); } -static void print_ip_port(int sockfd, info_fn fn, const char *fmt) +static int sendmsg_deny_prog_load(const struct sock_addr_test *test) { - char addr_buf[INET_NTOP_BUF]; - struct sockaddr_storage addr; - struct sockaddr_in6 *addr6; - struct sockaddr_in *addr4; - socklen_t addr_len; - unsigned short port; - void *nip; - - addr_len = sizeof(struct sockaddr_storage); - memset(&addr, 0, addr_len); - - if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) { - if (addr.ss_family == AF_INET) { - addr4 = (struct sockaddr_in *)&addr; - nip = (void *)&addr4->sin_addr; - port = ntohs(addr4->sin_port); - } else if (addr.ss_family == AF_INET6) { - addr6 = (struct sockaddr_in6 *)&addr; - nip = (void *)&addr6->sin6_addr; - port = ntohs(addr6->sin6_port); - } else { - return; - } - const char *addr_str = - inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF); - printf(fmt, addr_str ? addr_str : "??", port); + struct bpf_insn insns[] = { + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); +} + +static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) +{ + struct sockaddr_in dst4_rw_addr; + struct in_addr src4_rw_ip; + + if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) { + log_err("Invalid IPv4: %s", SRC4_REWRITE_IP); + return -1; + } + + if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT, + (struct sockaddr *)&dst4_rw_addr, + sizeof(dst4_rw_addr)) == -1) + return -1; + + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (sk.family == AF_INET && */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8), + + /* sk.type == SOCK_DGRAM) { */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6), + + /* msg_src_ip4 = src4_rw_ip */ + BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, msg_src_ip4)), + + /* user_ip4 = dst4_rw_addr.sin_addr */ + BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_ip4)), + + /* user_port = dst4_rw_addr.sin_port */ + BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_port)), + /* } */ + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); +} + +static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test) +{ + return load_path(test, SENDMSG4_PROG_PATH); +} + +static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test, + const char *rw_dst_ip) +{ + struct sockaddr_in6 dst6_rw_addr; + struct in6_addr src6_rw_ip; + + if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) { + log_err("Invalid IPv6: %s", SRC6_REWRITE_IP); + return -1; + } + + if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT, + (struct sockaddr *)&dst6_rw_addr, + sizeof(dst6_rw_addr)) == -1) + return -1; + + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (sk.family == AF_INET6) { */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18), + +#define STORE_IPV6_WORD_N(DST, SRC, N) \ + BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \ + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \ + offsetof(struct bpf_sock_addr, DST[N])) + +#define STORE_IPV6(DST, SRC) \ + STORE_IPV6_WORD_N(DST, SRC, 0), \ + STORE_IPV6_WORD_N(DST, SRC, 1), \ + STORE_IPV6_WORD_N(DST, SRC, 2), \ + STORE_IPV6_WORD_N(DST, SRC, 3) + + STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32), + STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32), + + /* user_port = dst6_rw_addr.sin6_port */ + BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_port)), + + /* } */ + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); +} + +static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test) +{ + return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP); +} + +static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) +{ + return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP); +} + +static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test) +{ + return load_path(test, SENDMSG6_PROG_PATH); +} + +static int cmp_addr(const struct sockaddr_storage *addr1, + const struct sockaddr_storage *addr2, int cmp_port) +{ + const struct sockaddr_in *four1, *four2; + const struct sockaddr_in6 *six1, *six2; + + if (addr1->ss_family != addr2->ss_family) + return -1; + + if (addr1->ss_family == AF_INET) { + four1 = (const struct sockaddr_in *)addr1; + four2 = (const struct sockaddr_in *)addr2; + return !((four1->sin_port == four2->sin_port || !cmp_port) && + four1->sin_addr.s_addr == four2->sin_addr.s_addr); + } else if (addr1->ss_family == AF_INET6) { + six1 = (const struct sockaddr_in6 *)addr1; + six2 = (const struct sockaddr_in6 *)addr2; + return !((six1->sin6_port == six2->sin6_port || !cmp_port) && + !memcmp(&six1->sin6_addr, &six2->sin6_addr, + sizeof(struct in6_addr))); } + + return -1; } -static void print_local_ip_port(int sockfd, const char *fmt) +static int cmp_sock_addr(info_fn fn, int sock1, + const struct sockaddr_storage *addr2, int cmp_port) { - print_ip_port(sockfd, getsockname, fmt); + struct sockaddr_storage addr1; + socklen_t len1 = sizeof(addr1); + + memset(&addr1, 0, len1); + if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0) + return -1; + + return cmp_addr(&addr1, addr2, cmp_port); +} + +static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2) +{ + return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0); } -static void print_remote_ip_port(int sockfd, const char *fmt) +static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2) { - print_ip_port(sockfd, getpeername, fmt); + return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1); +} + +static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2) +{ + return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1); } static int start_server(int type, const struct sockaddr_storage *addr, socklen_t addr_len) { - int fd; fd = socket(addr->ss_family, type, 0); @@ -358,8 +928,6 @@ static int start_server(int type, const struct sockaddr_storage *addr, } } - print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n"); - goto out; close_out: close(fd); @@ -372,19 +940,19 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr, socklen_t addr_len) { int domain; - int fd; + int fd = -1; domain = addr->ss_family; if (domain != AF_INET && domain != AF_INET6) { log_err("Unsupported address family"); - return -1; + goto err; } fd = socket(domain, type, 0); if (fd == -1) { - log_err("Failed to creating client socket"); - return -1; + log_err("Failed to create client socket"); + goto err; } if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) { @@ -392,162 +960,394 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr, goto err; } - print_remote_ip_port(fd, "\t Actual: connect(%s, %d)"); - print_local_ip_port(fd, " from (%s, %d)\n"); + goto out; +err: + close(fd); + fd = -1; +out: + return fd; +} + +int init_pktinfo(int domain, struct cmsghdr *cmsg) +{ + struct in6_pktinfo *pktinfo6; + struct in_pktinfo *pktinfo4; + + if (domain == AF_INET) { + cmsg->cmsg_level = SOL_IP; + cmsg->cmsg_type = IP_PKTINFO; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); + pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg); + memset(pktinfo4, 0, sizeof(struct in_pktinfo)); + if (inet_pton(domain, SRC4_IP, + (void *)&pktinfo4->ipi_spec_dst) != 1) + return -1; + } else if (domain == AF_INET6) { + cmsg->cmsg_level = SOL_IPV6; + cmsg->cmsg_type = IPV6_PKTINFO; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg); + memset(pktinfo6, 0, sizeof(struct in6_pktinfo)); + if (inet_pton(domain, SRC6_IP, + (void *)&pktinfo6->ipi6_addr) != 1) + return -1; + } else { + return -1; + } return 0; +} + +static int sendmsg_to_server(const struct sockaddr_storage *addr, + socklen_t addr_len, int set_cmsg, int *syscall_err) +{ + union { + char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))]; + struct cmsghdr align; + } control6; + union { + char buf[CMSG_SPACE(sizeof(struct in_pktinfo))]; + struct cmsghdr align; + } control4; + struct msghdr hdr; + struct iovec iov; + char data = 'a'; + int domain; + int fd = -1; + + domain = addr->ss_family; + + if (domain != AF_INET && domain != AF_INET6) { + log_err("Unsupported address family"); + goto err; + } + + fd = socket(domain, SOCK_DGRAM, 0); + if (fd == -1) { + log_err("Failed to create client socket"); + goto err; + } + + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = (void *)addr; + hdr.msg_namelen = addr_len; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + if (set_cmsg) { + if (domain == AF_INET) { + hdr.msg_control = &control4; + hdr.msg_controllen = sizeof(control4.buf); + } else if (domain == AF_INET6) { + hdr.msg_control = &control6; + hdr.msg_controllen = sizeof(control6.buf); + } + if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) { + log_err("Fail to init pktinfo"); + goto err; + } + } + + if (sendmsg(fd, &hdr, 0) != sizeof(data)) { + log_err("Fail to send message to server"); + *syscall_err = errno; + goto err; + } + + goto out; err: close(fd); - return -1; + fd = -1; +out: + return fd; } -static void print_test_case_num(int domain, int type) +static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr) { - static int test_num; - - printf("Test case #%d (%s/%s):\n", ++test_num, - (domain == AF_INET ? "IPv4" : - domain == AF_INET6 ? "IPv6" : - "unknown_domain"), - (type == SOCK_STREAM ? "TCP" : - type == SOCK_DGRAM ? "UDP" : - "unknown_type")); + struct timeval tv; + struct msghdr hdr; + struct iovec iov; + char data[64]; + fd_set rfds; + + FD_ZERO(&rfds); + FD_SET(sockfd, &rfds); + + tv.tv_sec = 2; + tv.tv_usec = 0; + + if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 || + !FD_ISSET(sockfd, &rfds)) + return -1; + + memset(&iov, 0, sizeof(iov)); + iov.iov_base = data; + iov.iov_len = sizeof(data); + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = src_addr; + hdr.msg_namelen = sizeof(struct sockaddr_storage); + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + return recvmsg(sockfd, &hdr, 0); } -static int run_test_case(int domain, int type, const char *ip, - unsigned short port) +static int init_addrs(const struct sock_addr_test *test, + struct sockaddr_storage *requested_addr, + struct sockaddr_storage *expected_addr, + struct sockaddr_storage *expected_src_addr) { - struct sockaddr_storage addr; - socklen_t addr_len = sizeof(addr); + socklen_t addr_len = sizeof(struct sockaddr_storage); + + if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port, + (struct sockaddr *)expected_addr, addr_len) == -1) + goto err; + + if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port, + (struct sockaddr *)requested_addr, addr_len) == -1) + goto err; + + if (test->expected_src_ip && + mk_sockaddr(test->domain, test->expected_src_ip, 0, + (struct sockaddr *)expected_src_addr, addr_len) == -1) + goto err; + + return 0; +err: + return -1; +} + +static int run_bind_test_case(const struct sock_addr_test *test) +{ + socklen_t addr_len = sizeof(struct sockaddr_storage); + struct sockaddr_storage requested_addr; + struct sockaddr_storage expected_addr; + int clientfd = -1; int servfd = -1; int err = 0; - print_test_case_num(domain, type); - - if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr, - addr_len) == -1) - return -1; + if (init_addrs(test, &requested_addr, &expected_addr, NULL)) + goto err; - printf("\tRequested: bind(%s, %d) ..\n", ip, port); - servfd = start_server(type, &addr, addr_len); + servfd = start_server(test->type, &requested_addr, addr_len); if (servfd == -1) goto err; - printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port); - if (connect_to_server(type, &addr, addr_len)) + if (cmp_local_addr(servfd, &expected_addr)) + goto err; + + /* Try to connect to server just in case */ + clientfd = connect_to_server(test->type, &expected_addr, addr_len); + if (clientfd == -1) goto err; goto out; err: err = -1; out: + close(clientfd); close(servfd); return err; } -static void close_progs_fds(struct program *progs, size_t prog_cnt) +static int run_connect_test_case(const struct sock_addr_test *test) { - size_t i; + socklen_t addr_len = sizeof(struct sockaddr_storage); + struct sockaddr_storage expected_src_addr; + struct sockaddr_storage requested_addr; + struct sockaddr_storage expected_addr; + int clientfd = -1; + int servfd = -1; + int err = 0; - for (i = 0; i < prog_cnt; ++i) { - close(progs[i].fd); - progs[i].fd = -1; - } + if (init_addrs(test, &requested_addr, &expected_addr, + &expected_src_addr)) + goto err; + + /* Prepare server to connect to */ + servfd = start_server(test->type, &expected_addr, addr_len); + if (servfd == -1) + goto err; + + clientfd = connect_to_server(test->type, &requested_addr, addr_len); + if (clientfd == -1) + goto err; + + /* Make sure src and dst addrs were overridden properly */ + if (cmp_peer_addr(clientfd, &expected_addr)) + goto err; + + if (cmp_local_ip(clientfd, &expected_src_addr)) + goto err; + + goto out; +err: + err = -1; +out: + close(clientfd); + close(servfd); + return err; } -static int load_and_attach_progs(int cgfd, struct program *progs, - size_t prog_cnt) +static int run_sendmsg_test_case(const struct sock_addr_test *test) { - size_t i; - - for (i = 0; i < prog_cnt; ++i) { - printf("Load %s with invalid type (can pollute stderr) ", - progs[i].name); - fflush(stdout); - progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL); - if (progs[i].fd != -1) { - log_err("Load with invalid type accepted for %s", - progs[i].name); - goto err; - } - printf("... REJECTED\n"); + socklen_t addr_len = sizeof(struct sockaddr_storage); + struct sockaddr_storage expected_src_addr; + struct sockaddr_storage requested_addr; + struct sockaddr_storage expected_addr; + struct sockaddr_storage real_src_addr; + int clientfd = -1; + int servfd = -1; + int set_cmsg; + int err = 0; + + if (test->type != SOCK_DGRAM) + goto err; - printf("Load %s with valid type", progs[i].name); - progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name); - if (progs[i].fd == -1) { - log_err("Failed to load program %s", progs[i].name); + if (init_addrs(test, &requested_addr, &expected_addr, + &expected_src_addr)) + goto err; + + /* Prepare server to sendmsg to */ + servfd = start_server(test->type, &expected_addr, addr_len); + if (servfd == -1) + goto err; + + for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) { + if (clientfd >= 0) + close(clientfd); + + clientfd = sendmsg_to_server(&requested_addr, addr_len, + set_cmsg, &err); + if (err) + goto out; + else if (clientfd == -1) goto err; - } - printf(" ... OK\n"); - printf("Attach %s with invalid type", progs[i].name); - if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type, - BPF_F_ALLOW_OVERRIDE) != -1) { - log_err("Attach with invalid type accepted for %s", - progs[i].name); + /* Try to receive message on server instead of using + * getpeername(2) on client socket, to check that client's + * destination address was rewritten properly, since + * getpeername(2) doesn't work with unconnected datagram + * sockets. + * + * Get source address from recvmsg(2) as well to make sure + * source was rewritten properly: getsockname(2) can't be used + * since socket is unconnected and source defined for one + * specific packet may differ from the one used by default and + * returned by getsockname(2). + */ + if (recvmsg_from_client(servfd, &real_src_addr) == -1) goto err; - } - printf(" ... REJECTED\n"); - printf("Attach %s with valid type", progs[i].name); - if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type, - BPF_F_ALLOW_OVERRIDE) == -1) { - log_err("Failed to attach program %s", progs[i].name); + if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0)) goto err; - } - printf(" ... OK\n"); } - return 0; + goto out; err: - close_progs_fds(progs, prog_cnt); - return -1; + err = -1; +out: + close(clientfd); + close(servfd); + return err; } -static int run_domain_test(int domain, int cgfd, struct program *progs, - size_t prog_cnt, const char *ip, unsigned short port) +static int run_test_case(int cgfd, const struct sock_addr_test *test) { + int progfd = -1; int err = 0; - if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1) + printf("Test case: %s .. ", test->descr); + + progfd = test->loadfn(test); + if (test->expected_result == LOAD_REJECT && progfd < 0) + goto out; + else if (test->expected_result == LOAD_REJECT || progfd < 0) + goto err; + + err = bpf_prog_attach(progfd, cgfd, test->attach_type, + BPF_F_ALLOW_OVERRIDE); + if (test->expected_result == ATTACH_REJECT && err) { + err = 0; /* error was expected, reset it */ + goto out; + } else if (test->expected_result == ATTACH_REJECT || err) { goto err; + } - if (run_test_case(domain, SOCK_STREAM, ip, port) == -1) + switch (test->attach_type) { + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: + err = run_bind_test_case(test); + break; + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + err = run_connect_test_case(test); + break; + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: + err = run_sendmsg_test_case(test); + break; + default: goto err; + } + + if (test->expected_result == SYSCALL_EPERM && err == EPERM) { + err = 0; /* error was expected, reset it */ + goto out; + } + + if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) { + err = 0; /* error was expected, reset it */ + goto out; + } - if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1) + if (err || test->expected_result != SUCCESS) goto err; goto out; err: err = -1; out: - close_progs_fds(progs, prog_cnt); + /* Detaching w/o checking return code: best effort attempt. */ + if (progfd != -1) + bpf_prog_detach(cgfd, test->attach_type); + close(progfd); + printf("[%s]\n", err ? "FAIL" : "PASS"); return err; } -static int run_test(void) +static int run_tests(int cgfd) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(cgfd, &tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +int main(int argc, char **argv) { - size_t inet6_prog_cnt; - size_t inet_prog_cnt; int cgfd = -1; int err = 0; - struct program inet6_progs[] = { - {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6", - BPF_CGROUP_INET4_BIND}, - {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6", - BPF_CGROUP_INET4_CONNECT}, - }; - inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program); - - struct program inet_progs[] = { - {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4", - BPF_CGROUP_INET6_BIND}, - {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4", - BPF_CGROUP_INET6_CONNECT}, - }; - inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program); + if (argc < 2) { + fprintf(stderr, + "%s has to be run via %s.sh. Skip direct run.\n", + argv[0], argv[0]); + exit(err); + } if (setup_cgroup_environment()) goto err; @@ -559,12 +1359,7 @@ static int run_test(void) if (join_cgroup(CG_PATH)) goto err; - if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP, - SERV4_PORT) == -1) - goto err; - - if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt, - SERV6_IP, SERV6_PORT) == -1) + if (run_tests(cgfd)) goto err; goto out; @@ -573,17 +1368,5 @@ err: out: close(cgfd); cleanup_cgroup_environment(); - printf(err ? "### FAIL\n" : "### SUCCESS\n"); return err; } - -int main(int argc, char **argv) -{ - if (argc < 2) { - fprintf(stderr, - "%s has to be run via %s.sh. Skip direct run.\n", - argv[0], argv[0]); - exit(0); - } - return run_test(); -} diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c new file mode 100644 index 000000000000..e6755916442a --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockhash_kern.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io +#undef SOCKMAP +#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH +#include "./test_sockmap_kern.h" diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c new file mode 100644 index 000000000000..9e78df207919 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -0,0 +1,1518 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <stdbool.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/wait.h> +#include <time.h> +#include <sched.h> + +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/sendfile.h> + +#include <linux/netlink.h> +#include <linux/socket.h> +#include <linux/sock_diag.h> +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <libgen.h> + +#include <getopt.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +int running; +static void running_handler(int a); + +/* randomly selected ports for testing on lo */ +#define S1_PORT 10000 +#define S2_PORT 10001 + +#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o" +#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o" +#define CG_PATH "/sockmap" + +/* global sockets */ +int s1, s2, c1, c2, p1, p2; +int test_cnt; +int passed; +int failed; +int map_fd[8]; +struct bpf_map *maps[8]; +int prog_fd[11]; + +int txmsg_pass; +int txmsg_noisy; +int txmsg_redir; +int txmsg_redir_noisy; +int txmsg_drop; +int txmsg_apply; +int txmsg_cork; +int txmsg_start; +int txmsg_end; +int txmsg_ingress; +int txmsg_skb; + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"cgroup", required_argument, NULL, 'c' }, + {"rate", required_argument, NULL, 'r' }, + {"verbose", no_argument, NULL, 'v' }, + {"iov_count", required_argument, NULL, 'i' }, + {"length", required_argument, NULL, 'l' }, + {"test", required_argument, NULL, 't' }, + {"data_test", no_argument, NULL, 'd' }, + {"txmsg", no_argument, &txmsg_pass, 1 }, + {"txmsg_noisy", no_argument, &txmsg_noisy, 1 }, + {"txmsg_redir", no_argument, &txmsg_redir, 1 }, + {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1}, + {"txmsg_drop", no_argument, &txmsg_drop, 1 }, + {"txmsg_apply", required_argument, NULL, 'a'}, + {"txmsg_cork", required_argument, NULL, 'k'}, + {"txmsg_start", required_argument, NULL, 's'}, + {"txmsg_end", required_argument, NULL, 'e'}, + {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, + {"txmsg_skb", no_argument, &txmsg_skb, 1 }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + + printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]); + printf(" options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)\n", + *long_options[i].flag); + else + printf(" -%c\n", long_options[i].val); + } + printf("\n"); +} + +static int sockmap_init_sockets(int verbose) +{ + int i, err, one = 1; + struct sockaddr_in addr; + int *fds[4] = {&s1, &s2, &c1, &c2}; + + s1 = s2 = p1 = p2 = c1 = c2 = 0; + + /* Init sockets */ + for (i = 0; i < 4; i++) { + *fds[i] = socket(AF_INET, SOCK_STREAM, 0); + if (*fds[i] < 0) { + perror("socket s1 failed()"); + return errno; + } + } + + /* Allow reuse */ + for (i = 0; i < 2; i++) { + err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + if (err) { + perror("setsockopt failed()"); + return errno; + } + } + + /* Non-blocking sockets */ + for (i = 0; i < 2; i++) { + err = ioctl(*fds[i], FIONBIO, (char *)&one); + if (err < 0) { + perror("ioctl s1 failed()"); + return errno; + } + } + + /* Bind server sockets */ + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + + addr.sin_port = htons(S1_PORT); + err = bind(s1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s1 failed()\n"); + return errno; + } + + addr.sin_port = htons(S2_PORT); + err = bind(s2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s2 failed()\n"); + return errno; + } + + /* Listen server sockets */ + addr.sin_port = htons(S1_PORT); + err = listen(s1, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + return errno; + } + + addr.sin_port = htons(S2_PORT); + err = listen(s2, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + return errno; + } + + /* Initiate Connect */ + addr.sin_port = htons(S1_PORT); + err = connect(c1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c1 failed()\n"); + return errno; + } + + addr.sin_port = htons(S2_PORT); + err = connect(c2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c2 failed()\n"); + return errno; + } else if (err < 0) { + err = 0; + } + + /* Accept Connecrtions */ + p1 = accept(s1, NULL, NULL); + if (p1 < 0) { + perror("accept s1 failed()\n"); + return errno; + } + + p2 = accept(s2, NULL, NULL); + if (p2 < 0) { + perror("accept s1 failed()\n"); + return errno; + } + + if (verbose) { + printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); + printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", + c1, s1, c2, s2); + } + return 0; +} + +struct msg_stats { + size_t bytes_sent; + size_t bytes_recvd; + struct timespec start; + struct timespec end; +}; + +struct sockmap_options { + int verbose; + bool base; + bool sendpage; + bool data_test; + bool drop_expected; + int iov_count; + int iov_length; + int rate; +}; + +static int msg_loop_sendpage(int fd, int iov_length, int cnt, + struct msg_stats *s, + struct sockmap_options *opt) +{ + bool drop = opt->drop_expected; + unsigned char k = 0; + FILE *file; + int i, fp; + + file = fopen(".sendpage_tst.tmp", "w+"); + for (i = 0; i < iov_length * cnt; i++, k++) + fwrite(&k, sizeof(char), 1, file); + fflush(file); + fseek(file, 0, SEEK_SET); + fclose(file); + + fp = open(".sendpage_tst.tmp", O_RDONLY); + clock_gettime(CLOCK_MONOTONIC, &s->start); + for (i = 0; i < cnt; i++) { + int sent = sendfile(fd, fp, NULL, iov_length); + + if (!drop && sent < 0) { + perror("send loop error:"); + close(fp); + return sent; + } else if (drop && sent >= 0) { + printf("sendpage loop error expected: %i\n", sent); + close(fp); + return -EIO; + } + + if (sent > 0) + s->bytes_sent += sent; + } + clock_gettime(CLOCK_MONOTONIC, &s->end); + close(fp); + return 0; +} + +static int msg_loop(int fd, int iov_count, int iov_length, int cnt, + struct msg_stats *s, bool tx, + struct sockmap_options *opt) +{ + struct msghdr msg = {0}; + int err, i, flags = MSG_NOSIGNAL; + struct iovec *iov; + unsigned char k; + bool data_test = opt->data_test; + bool drop = opt->drop_expected; + + iov = calloc(iov_count, sizeof(struct iovec)); + if (!iov) + return errno; + + k = 0; + for (i = 0; i < iov_count; i++) { + unsigned char *d = calloc(iov_length, sizeof(char)); + + if (!d) { + fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); + goto out_errno; + } + iov[i].iov_base = d; + iov[i].iov_len = iov_length; + + if (data_test && tx) { + int j; + + for (j = 0; j < iov_length; j++) + d[j] = k++; + } + } + + msg.msg_iov = iov; + msg.msg_iovlen = iov_count; + k = 0; + + if (tx) { + clock_gettime(CLOCK_MONOTONIC, &s->start); + for (i = 0; i < cnt; i++) { + int sent = sendmsg(fd, &msg, flags); + + if (!drop && sent < 0) { + perror("send loop error:"); + goto out_errno; + } else if (drop && sent >= 0) { + printf("send loop error expected: %i\n", sent); + errno = -EIO; + goto out_errno; + } + if (sent > 0) + s->bytes_sent += sent; + } + clock_gettime(CLOCK_MONOTONIC, &s->end); + } else { + int slct, recv, max_fd = fd; + int fd_flags = O_NONBLOCK; + struct timeval timeout; + float total_bytes; + int bytes_cnt = 0; + int chunk_sz; + fd_set w; + + if (opt->sendpage) + chunk_sz = iov_length * cnt; + else + chunk_sz = iov_length * iov_count; + + fcntl(fd, fd_flags); + total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + err = clock_gettime(CLOCK_MONOTONIC, &s->start); + if (err < 0) + perror("recv start time: "); + while (s->bytes_recvd < total_bytes) { + if (txmsg_cork) { + timeout.tv_sec = 0; + timeout.tv_usec = 1000; + } else { + timeout.tv_sec = 1; + timeout.tv_usec = 0; + } + + /* FD sets */ + FD_ZERO(&w); + FD_SET(fd, &w); + + slct = select(max_fd + 1, &w, NULL, NULL, &timeout); + if (slct == -1) { + perror("select()"); + clock_gettime(CLOCK_MONOTONIC, &s->end); + goto out_errno; + } else if (!slct) { + if (opt->verbose) + fprintf(stderr, "unexpected timeout\n"); + errno = -EIO; + clock_gettime(CLOCK_MONOTONIC, &s->end); + goto out_errno; + } + + recv = recvmsg(fd, &msg, flags); + if (recv < 0) { + if (errno != EWOULDBLOCK) { + clock_gettime(CLOCK_MONOTONIC, &s->end); + perror("recv failed()\n"); + goto out_errno; + } + } + + s->bytes_recvd += recv; + + if (data_test) { + int j; + + for (i = 0; i < msg.msg_iovlen; i++) { + unsigned char *d = iov[i].iov_base; + + for (j = 0; + j < iov[i].iov_len && recv; j++) { + if (d[j] != k++) { + errno = -EIO; + fprintf(stderr, + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", + i, j, d[j], k - 1, d[j+1], k); + goto out_errno; + } + bytes_cnt++; + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + } + recv--; + } + } + } + } + clock_gettime(CLOCK_MONOTONIC, &s->end); + } + + for (i = 0; i < iov_count; i++) + free(iov[i].iov_base); + free(iov); + return 0; +out_errno: + for (i = 0; i < iov_count; i++) + free(iov[i].iov_base); + free(iov); + return errno; +} + +static float giga = 1000000000; + +static inline float sentBps(struct msg_stats s) +{ + return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec); +} + +static inline float recvdBps(struct msg_stats s) +{ + return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec); +} + +static int sendmsg_test(struct sockmap_options *opt) +{ + float sent_Bps = 0, recvd_Bps = 0; + int rx_fd, txpid, rxpid, err = 0; + struct msg_stats s = {0}; + int iov_count = opt->iov_count; + int iov_buf = opt->iov_length; + int rx_status, tx_status; + int cnt = opt->rate; + + errno = 0; + + if (opt->base) + rx_fd = p1; + else + rx_fd = p2; + + rxpid = fork(); + if (rxpid == 0) { + if (opt->drop_expected) + exit(0); + + if (opt->sendpage) + iov_count = 1; + err = msg_loop(rx_fd, iov_count, iov_buf, + cnt, &s, false, opt); + if (err && opt->verbose) + fprintf(stderr, + "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", + iov_count, iov_buf, cnt, err); + shutdown(p2, SHUT_RDWR); + shutdown(p1, SHUT_RDWR); + if (s.end.tv_sec - s.start.tv_sec) { + sent_Bps = sentBps(s); + recvd_Bps = recvdBps(s); + } + if (opt->verbose) + fprintf(stdout, + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", + s.bytes_sent, sent_Bps, sent_Bps/giga, + s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + if (err && txmsg_cork) + err = 0; + exit(err ? 1 : 0); + } else if (rxpid == -1) { + perror("msg_loop_rx: "); + return errno; + } + + txpid = fork(); + if (txpid == 0) { + if (opt->sendpage) + err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt); + else + err = msg_loop(c1, iov_count, iov_buf, + cnt, &s, true, opt); + + if (err) + fprintf(stderr, + "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n", + iov_count, iov_buf, cnt, err); + shutdown(c1, SHUT_RDWR); + if (s.end.tv_sec - s.start.tv_sec) { + sent_Bps = sentBps(s); + recvd_Bps = recvdBps(s); + } + if (opt->verbose) + fprintf(stdout, + "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n", + s.bytes_sent, sent_Bps, sent_Bps/giga, + s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + exit(err ? 1 : 0); + } else if (txpid == -1) { + perror("msg_loop_tx: "); + return errno; + } + + assert(waitpid(rxpid, &rx_status, 0) == rxpid); + assert(waitpid(txpid, &tx_status, 0) == txpid); + if (WIFEXITED(rx_status)) { + err = WEXITSTATUS(rx_status); + if (err) { + fprintf(stderr, "rx thread exited with err %d. ", err); + goto out; + } + } + if (WIFEXITED(tx_status)) { + err = WEXITSTATUS(tx_status); + if (err) + fprintf(stderr, "tx thread exited with err %d. ", err); + } +out: + return err; +} + +static int forever_ping_pong(int rate, struct sockmap_options *opt) +{ + struct timeval timeout; + char buf[1024] = {0}; + int sc; + + timeout.tv_sec = 10; + timeout.tv_usec = 0; + + /* Ping/Pong data from client to server */ + sc = send(c1, buf, sizeof(buf), 0); + if (sc < 0) { + perror("send failed()\n"); + return sc; + } + + do { + int s, rc, i, max_fd = p2; + fd_set w; + + /* FD sets */ + FD_ZERO(&w); + FD_SET(c1, &w); + FD_SET(c2, &w); + FD_SET(p1, &w); + FD_SET(p2, &w); + + s = select(max_fd + 1, &w, NULL, NULL, &timeout); + if (s == -1) { + perror("select()"); + break; + } else if (!s) { + fprintf(stderr, "unexpected timeout\n"); + break; + } + + for (i = 0; i <= max_fd && s > 0; ++i) { + if (!FD_ISSET(i, &w)) + continue; + + s--; + + rc = recv(i, buf, sizeof(buf), 0); + if (rc < 0) { + if (errno != EWOULDBLOCK) { + perror("recv failed()\n"); + return rc; + } + } + + if (rc == 0) { + close(i); + break; + } + + sc = send(i, buf, rc, 0); + if (sc < 0) { + perror("send failed()\n"); + return sc; + } + } + + if (rate) + sleep(rate); + + if (opt->verbose) { + printf("."); + fflush(stdout); + + } + } while (running); + + return 0; +} + +enum { + PING_PONG, + SENDMSG, + BASE, + BASE_SENDPAGE, + SENDPAGE, +}; + +static int run_options(struct sockmap_options *options, int cg_fd, int test) +{ + int i, key, next_key, err, tx_prog_fd = -1, zero = 0; + + /* If base test skip BPF setup */ + if (test == BASE || test == BASE_SENDPAGE) + goto run; + + /* Attach programs to sockmap */ + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); + return err; + } + + err = bpf_prog_attach(prog_fd[1], map_fd[0], + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + + /* Attach to cgroups */ + err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", + err, strerror(errno)); + return err; + } + +run: + err = sockmap_init_sockets(options->verbose); + if (err) { + fprintf(stderr, "ERROR: test socket failed: %d\n", err); + goto out; + } + + /* Attach txmsg program to sockmap */ + if (txmsg_pass) + tx_prog_fd = prog_fd[3]; + else if (txmsg_noisy) + tx_prog_fd = prog_fd[4]; + else if (txmsg_redir) + tx_prog_fd = prog_fd[5]; + else if (txmsg_redir_noisy) + tx_prog_fd = prog_fd[6]; + else if (txmsg_drop) + tx_prog_fd = prog_fd[9]; + /* apply and cork must be last */ + else if (txmsg_apply) + tx_prog_fd = prog_fd[7]; + else if (txmsg_cork) + tx_prog_fd = prog_fd[8]; + else + tx_prog_fd = 0; + + if (tx_prog_fd) { + int redir_fd, i = 0; + + err = bpf_prog_attach(tx_prog_fd, + map_fd[1], BPF_SK_MSG_VERDICT, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", + err, strerror(errno)); + goto out; + } + + err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", + err, strerror(errno)); + goto out; + } + + if (txmsg_redir || txmsg_redir_noisy) + redir_fd = c2; + else + redir_fd = c1; + + err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", + err, strerror(errno)); + goto out; + } + + if (txmsg_apply) { + err = bpf_map_update_elem(map_fd[3], + &i, &txmsg_apply, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_cork) { + err = bpf_map_update_elem(map_fd[4], + &i, &txmsg_cork, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_start) { + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_end) { + i = 1; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_end, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_ingress) { + int in = BPF_F_INGRESS; + + i = 0; + err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n", + err, strerror(errno)); + } + i = 1; + err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n", + err, strerror(errno)); + } + err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n", + err, strerror(errno)); + } + + i = 2; + err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n", + err, strerror(errno)); + } + } + + if (txmsg_skb) { + int skb_fd = (test == SENDMSG || test == SENDPAGE) ? + p2 : p1; + int ingress = BPF_F_INGRESS; + + i = 0; + err = bpf_map_update_elem(map_fd[7], + &i, &ingress, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n", + err, strerror(errno)); + } + + i = 3; + err = bpf_map_update_elem(map_fd[0], + &i, &skb_fd, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", + err, strerror(errno)); + } + } + } + + if (txmsg_drop) + options->drop_expected = true; + + if (test == PING_PONG) + err = forever_ping_pong(options->rate, options); + else if (test == SENDMSG) { + options->base = false; + options->sendpage = false; + err = sendmsg_test(options); + } else if (test == SENDPAGE) { + options->base = false; + options->sendpage = true; + err = sendmsg_test(options); + } else if (test == BASE) { + options->base = true; + options->sendpage = false; + err = sendmsg_test(options); + } else if (test == BASE_SENDPAGE) { + options->base = true; + options->sendpage = true; + err = sendmsg_test(options); + } else + fprintf(stderr, "unknown test\n"); +out: + /* Detatch and zero all the maps */ + bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); + bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); + bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); + if (tx_prog_fd >= 0) + bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); + + for (i = 0; i < 8; i++) { + key = next_key = 0; + bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); + while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) { + bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); + key = next_key; + } + } + + close(s1); + close(s2); + close(p1); + close(p2); + close(c1); + close(c2); + return err; +} + +static char *test_to_str(int test) +{ + switch (test) { + case SENDMSG: + return "sendmsg"; + case SENDPAGE: + return "sendpage"; + } + return "unknown"; +} + +#define OPTSTRING 60 +static void test_options(char *options) +{ + char tstr[OPTSTRING]; + + memset(options, 0, OPTSTRING); + + if (txmsg_pass) + strncat(options, "pass,", OPTSTRING); + if (txmsg_noisy) + strncat(options, "pass_noisy,", OPTSTRING); + if (txmsg_redir) + strncat(options, "redir,", OPTSTRING); + if (txmsg_redir_noisy) + strncat(options, "redir_noisy,", OPTSTRING); + if (txmsg_drop) + strncat(options, "drop,", OPTSTRING); + if (txmsg_apply) { + snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply); + strncat(options, tstr, OPTSTRING); + } + if (txmsg_cork) { + snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork); + strncat(options, tstr, OPTSTRING); + } + if (txmsg_start) { + snprintf(tstr, OPTSTRING, "start %d,", txmsg_start); + strncat(options, tstr, OPTSTRING); + } + if (txmsg_end) { + snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); + strncat(options, tstr, OPTSTRING); + } + if (txmsg_ingress) + strncat(options, "ingress,", OPTSTRING); + if (txmsg_skb) + strncat(options, "skb,", OPTSTRING); +} + +static int __test_exec(int cgrp, int test, struct sockmap_options *opt) +{ + char *options = calloc(OPTSTRING, sizeof(char)); + int err; + + if (test == SENDPAGE) + opt->sendpage = true; + else + opt->sendpage = false; + + if (txmsg_drop) + opt->drop_expected = true; + else + opt->drop_expected = false; + + test_options(options); + + fprintf(stdout, + "[TEST %i]: (%i, %i, %i, %s, %s): ", + test_cnt, opt->rate, opt->iov_count, opt->iov_length, + test_to_str(test), options); + fflush(stdout); + err = run_options(opt, cgrp, test); + fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED"); + test_cnt++; + !err ? passed++ : failed++; + free(options); + return err; +} + +static int test_exec(int cgrp, struct sockmap_options *opt) +{ + int err = __test_exec(cgrp, SENDMSG, opt); + + if (err) + goto out; + + err = __test_exec(cgrp, SENDPAGE, opt); +out: + return err; +} + +static int test_loop(int cgrp) +{ + struct sockmap_options opt; + + int err, i, l, r; + + opt.verbose = 0; + opt.base = false; + opt.sendpage = false; + opt.data_test = false; + opt.drop_expected = false; + opt.iov_count = 0; + opt.iov_length = 0; + opt.rate = 0; + + r = 1; + for (i = 1; i < 100; i += 33) { + for (l = 1; l < 100; l += 33) { + opt.rate = r; + opt.iov_count = i; + opt.iov_length = l; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + } + sched_yield(); +out: + return err; +} + +static int test_txmsg(int cgrp) +{ + int err; + + txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; + txmsg_apply = txmsg_cork = 0; + txmsg_ingress = txmsg_skb = 0; + + txmsg_pass = 1; + err = test_loop(cgrp); + txmsg_pass = 0; + if (err) + goto out; + + txmsg_redir = 1; + err = test_loop(cgrp); + txmsg_redir = 0; + if (err) + goto out; + + txmsg_drop = 1; + err = test_loop(cgrp); + txmsg_drop = 0; + if (err) + goto out; + + txmsg_redir = 1; + txmsg_ingress = 1; + err = test_loop(cgrp); + txmsg_redir = 0; + txmsg_ingress = 0; + if (err) + goto out; +out: + txmsg_pass = 0; + txmsg_redir = 0; + txmsg_drop = 0; + return err; +} + +static int test_send(struct sockmap_options *opt, int cgrp) +{ + int err; + + opt->iov_length = 1; + opt->iov_count = 1; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1; + opt->iov_count = 1024; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1024; + opt->iov_count = 1; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1; + opt->iov_count = 1; + opt->rate = 512; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 256; + opt->iov_count = 1024; + opt->rate = 2; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->rate = 100; + opt->iov_count = 1; + opt->iov_length = 5; + err = test_exec(cgrp, opt); + if (err) + goto out; +out: + sched_yield(); + return err; +} + +static int test_mixed(int cgrp) +{ + struct sockmap_options opt = {0}; + int err; + + txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; + txmsg_apply = txmsg_cork = 0; + txmsg_start = txmsg_end = 0; + /* Test small and large iov_count values with pass/redir/apply/cork */ + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 0; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1024; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 0; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1024; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_cork = 4096; + txmsg_apply = 4096; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 0; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1024; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 0; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1024; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_cork = 4096; + txmsg_apply = 4096; + err = test_send(&opt, cgrp); + if (err) + goto out; +out: + return err; +} + +static int test_start_end(int cgrp) +{ + struct sockmap_options opt = {0}; + int err, i; + + /* Test basic start/end with lots of iov_count and iov_lengths */ + txmsg_start = 1; + txmsg_end = 2; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Test start/end with cork */ + opt.rate = 16; + opt.iov_count = 1; + opt.iov_length = 100; + txmsg_cork = 1600; + + for (i = 99; i <= 1600; i += 500) { + txmsg_start = 0; + txmsg_end = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + + /* Test start/end with cork but pull data in middle */ + for (i = 199; i <= 1600; i += 500) { + txmsg_start = 100; + txmsg_end = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + + /* Test start/end with cork pulling last sg entry */ + txmsg_start = 1500; + txmsg_end = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end pull of single byte in last page */ + txmsg_start = 1111; + txmsg_end = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with end < start */ + txmsg_start = 1111; + txmsg_end = 0; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with end > data */ + txmsg_start = 0; + txmsg_end = 1601; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with start > data */ + txmsg_start = 1601; + txmsg_end = 1600; + err = test_exec(cgrp, &opt); + +out: + txmsg_start = 0; + txmsg_end = 0; + sched_yield(); + return err; +} + +char *map_names[] = { + "sock_map", + "sock_map_txmsg", + "sock_map_redir", + "sock_apply_bytes", + "sock_cork_bytes", + "sock_pull_bytes", + "sock_redir_flags", + "sock_skb_opts", +}; + +int prog_attach_type[] = { + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_SOCK_OPS, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, +}; + +int prog_type[] = { + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, +}; + +static int populate_progs(char *bpf_file) +{ + struct bpf_program *prog; + struct bpf_object *obj; + int i = 0; + long err; + + obj = bpf_object__open(bpf_file); + err = libbpf_get_error(obj); + if (err) { + char err_buf[256]; + + libbpf_strerror(err, err_buf, sizeof(err_buf)); + printf("Unable to load eBPF objects in file '%s' : %s\n", + bpf_file, err_buf); + return -1; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_type(prog, prog_type[i]); + bpf_program__set_expected_attach_type(prog, + prog_attach_type[i]); + i++; + } + + i = bpf_object__load(obj); + i = 0; + bpf_object__for_each_program(prog, obj) { + prog_fd[i] = bpf_program__fd(prog); + i++; + } + + for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) { + maps[i] = bpf_object__find_map_by_name(obj, map_names[i]); + map_fd[i] = bpf_map__fd(maps[i]); + if (map_fd[i] < 0) { + fprintf(stderr, "load_bpf_file: (%i) %s\n", + map_fd[i], strerror(errno)); + return -1; + } + } + + return 0; +} + +static int __test_suite(char *bpf_file) +{ + int cg_fd, err; + + err = populate_progs(bpf_file); + if (err < 0) { + fprintf(stderr, "ERROR: (%i) load bpf failed\n", err); + return err; + } + + if (setup_cgroup_environment()) { + fprintf(stderr, "ERROR: cgroup env failed\n"); + return -EINVAL; + } + + cg_fd = create_and_get_cgroup(CG_PATH); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + + if (join_cgroup(CG_PATH)) { + fprintf(stderr, "ERROR: failed to join cgroup\n"); + return -EINVAL; + } + + /* Tests basic commands and APIs with range of iov values */ + txmsg_start = txmsg_end = 0; + err = test_txmsg(cg_fd); + if (err) + goto out; + + /* Tests interesting combinations of APIs used together */ + err = test_mixed(cg_fd); + if (err) + goto out; + + /* Tests pull_data API using start/end API */ + err = test_start_end(cg_fd); + if (err) + goto out; + +out: + printf("Summary: %i PASSED %i FAILED\n", passed, failed); + cleanup_cgroup_environment(); + close(cg_fd); + return err; +} + +static int test_suite(void) +{ + int err; + + err = __test_suite(BPF_SOCKMAP_FILENAME); + if (err) + goto out; + err = __test_suite(BPF_SOCKHASH_FILENAME); +out: + return err; +} + +int main(int argc, char **argv) +{ + int iov_count = 1, length = 1024, rate = 1; + struct sockmap_options options = {0}; + int opt, longindex, err, cg_fd = 0; + char *bpf_file = BPF_SOCKMAP_FILENAME; + int test = PING_PONG; + + if (argc < 2) + return test_suite(); + + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", + long_options, &longindex)) != -1) { + switch (opt) { + case 's': + txmsg_start = atoi(optarg); + break; + case 'e': + txmsg_end = atoi(optarg); + break; + case 'a': + txmsg_apply = atoi(optarg); + break; + case 'k': + txmsg_cork = atoi(optarg); + break; + case 'c': + cg_fd = open(optarg, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + break; + case 'r': + rate = atoi(optarg); + break; + case 'v': + options.verbose = 1; + break; + case 'i': + iov_count = atoi(optarg); + break; + case 'l': + length = atoi(optarg); + break; + case 'd': + options.data_test = true; + break; + case 't': + if (strcmp(optarg, "ping") == 0) { + test = PING_PONG; + } else if (strcmp(optarg, "sendmsg") == 0) { + test = SENDMSG; + } else if (strcmp(optarg, "base") == 0) { + test = BASE; + } else if (strcmp(optarg, "base_sendpage") == 0) { + test = BASE_SENDPAGE; + } else if (strcmp(optarg, "sendpage") == 0) { + test = SENDPAGE; + } else { + usage(argv); + return -1; + } + break; + case 0: + break; + case 'h': + default: + usage(argv); + return -1; + } + } + + if (!cg_fd) { + fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", + argv[0]); + return -1; + } + + err = populate_progs(bpf_file); + if (err) { + fprintf(stderr, "populate program: (%s) %s\n", + bpf_file, strerror(errno)); + return 1; + } + running = 1; + + /* catch SIGINT */ + signal(SIGINT, running_handler); + + options.iov_count = iov_count; + options.iov_length = length; + options.rate = rate; + + err = run_options(&options, cg_fd, test); + close(cg_fd); + return err; +} + +void running_handler(int a) +{ + running = 0; +} diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c new file mode 100644 index 000000000000..677b2ed1cc1e --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockmap_kern.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io +#define SOCKMAP +#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP +#include "./test_sockmap_kern.h" diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h new file mode 100644 index 000000000000..8e8e41780bb9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* Sockmap sample program connects a client and a backend together + * using cgroups. + * + * client:X <---> frontend:80 client:X <---> backend:80 + * + * For simplicity we hard code values here and bind 1:1. The hard + * coded values are part of the setup in sockmap.sh script that + * is associated with this BPF program. + * + * The bpf_printk is verbose and prints information as connections + * are established and verdicts are decided. + */ + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") sock_map = { + .type = TEST_MAP_TYPE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_txmsg = { + .type = TEST_MAP_TYPE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_redir = { + .type = TEST_MAP_TYPE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_apply_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +struct bpf_map_def SEC("maps") sock_cork_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +struct bpf_map_def SEC("maps") sock_pull_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2 +}; + +struct bpf_map_def SEC("maps") sock_redir_flags = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +struct bpf_map_def SEC("maps") sock_skb_opts = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +SEC("sk_skb1") +int bpf_prog1(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb2") +int bpf_prog2(struct __sk_buff *skb) +{ + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + int len, *f, ret, zero = 0; + __u64 flags = 0; + + if (lport == 10000) + ret = 10; + else + ret = 1; + + len = (__u32)skb->data_end - (__u32)skb->data; + f = bpf_map_lookup_elem(&sock_skb_opts, &zero); + if (f && *f) { + ret = 3; + flags = *f; + } + + bpf_printk("sk_skb2: redirect(%iB) flags=%i\n", + len, flags); +#ifdef SOCKMAP + return bpf_sk_redirect_map(skb, &sock_map, ret, flags); +#else + return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags); +#endif + +} + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 lport, rport; + int op, err = 0, index, key, ret; + + + op = (int) skops->op; + + switch (op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (lport == 10000) { + ret = 1; +#ifdef SOCKMAP + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST); +#else + err = bpf_sock_hash_update(skops, &sock_map, &ret, + BPF_NOEXIST); +#endif + bpf_printk("passive(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (bpf_ntohl(rport) == 10001) { + ret = 10; +#ifdef SOCKMAP + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST); +#else + err = bpf_sock_hash_update(skops, &sock_map, &ret, + BPF_NOEXIST); +#endif + bpf_printk("active(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + default: + break; + } + + return 0; +} + +SEC("sk_msg1") +int bpf_prog4(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1; + int *start, *end; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + return SK_PASS; +} + +SEC("sk_msg2") +int bpf_prog5(struct sk_msg_md *msg) +{ + int err1 = -1, err2 = -1, zero = 0, one = 1; + int *bytes, *start, *end, len1, len2; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + err1 = bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + err2 = bpf_msg_cork_bytes(msg, *bytes); + len1 = (__u64)msg->data_end - (__u64)msg->data; + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) { + int err; + + bpf_printk("sk_msg2: pull(%i:%i)\n", + start ? *start : 0, end ? *end : 0); + err = bpf_msg_pull_data(msg, *start, *end, 0); + if (err) + bpf_printk("sk_msg2: pull_data err %i\n", + err); + len2 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length update %i->%i\n", + len1, len2); + } + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", + len1, err1, err2); + return SK_PASS; +} + +SEC("sk_msg3") +int bpf_prog6(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1, key = 0; + int *start, *end, *f; + __u64 flags = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); + if (f && *f) { + key = 2; + flags = *f; + } +#ifdef SOCKMAP + return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); +#else + return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags); +#endif +} + +SEC("sk_msg4") +int bpf_prog7(struct sk_msg_md *msg) +{ + int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; + int *f, *bytes, *start, *end, len1, len2; + __u64 flags = 0; + + int err; + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + err1 = bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + err2 = bpf_msg_cork_bytes(msg, *bytes); + len1 = (__u64)msg->data_end - (__u64)msg->data; + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) { + + bpf_printk("sk_msg2: pull(%i:%i)\n", + start ? *start : 0, end ? *end : 0); + err = bpf_msg_pull_data(msg, *start, *end, 0); + if (err) + bpf_printk("sk_msg2: pull_data err %i\n", + err); + len2 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length update %i->%i\n", + len1, len2); + } + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); + if (f && *f) { + key = 2; + flags = *f; + } + bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n", + len1, flags, err1 ? err1 : err2); +#ifdef SOCKMAP + err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags); +#else + err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags); +#endif + bpf_printk("sk_msg3: err %i\n", err); + return err; +} + +SEC("sk_msg5") +int bpf_prog8(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + int ret = 0, *bytes, zero = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) { + ret = bpf_msg_apply_bytes(msg, *bytes); + if (ret) + return SK_DROP; + } else { + return SK_DROP; + } + return SK_PASS; +} +SEC("sk_msg6") +int bpf_prog9(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + int ret = 0, *bytes, zero = 0; + + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) { + if (((__u64)data_end - (__u64)data) >= *bytes) + return SK_PASS; + ret = bpf_msg_cork_bytes(msg, *bytes); + if (ret) + return SK_DROP; + } + return SK_PASS; +} + +SEC("sk_msg7") +int bpf_prog10(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1; + int *start, *end; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + + return SK_DROP; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c index b755bd783ce5..d86c281e957f 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c @@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 10000, + .max_entries = 16384, }; struct bpf_map_def SEC("maps") stackmap = { @@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = { .map_flags = BPF_F_STACK_BUILD_ID, }; +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH, + .max_entries = 128, +}; + /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ struct random_urandom_args { unsigned long long pad; @@ -42,7 +50,10 @@ struct random_urandom_args { SEC("tracepoint/random/urandom_read") int oncpu(struct random_urandom_args *args) { + __u32 max_len = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH; __u32 key = 0, val = 0, *value_p; + void *stack_p; value_p = bpf_map_lookup_elem(&control_map, &key); if (value_p && *value_p) @@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); - if ((int)key >= 0) + if ((int)key >= 0) { bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(args, stack_p, max_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + } return 0; } diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c index 76d85c5d08bd..af111af7ca1a 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 10000, + .max_entries = 16384, }; struct bpf_map_def SEC("maps") stackmap = { .type = BPF_MAP_TYPE_STACK_TRACE, .key_size = sizeof(__u32), .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, - .max_entries = 10000, + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 16384, }; /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ @@ -44,7 +51,9 @@ struct sched_switch_args { SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { + __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); __u32 key = 0, val = 0, *value_p; + void *stack_p; value_p = bpf_map_lookup_elem(&control_map, &key); if (value_p && *value_p) @@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); - if ((int)key >= 0) + if ((int)key >= 0) { bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(ctx, stack_p, max_len, 0); + } return 0; } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh new file mode 100755 index 000000000000..546aee3e9fb4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -0,0 +1,731 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# End-to-end eBPF tunnel test suite +# The script tests BPF network tunnel implementation. +# +# Topology: +# --------- +# root namespace | at_ns0 namespace +# | +# ----------- | ----------- +# | tnl dev | | | tnl dev | (overlay network) +# ----------- | ----------- +# metadata-mode | native-mode +# with bpf | +# | +# ---------- | ---------- +# | veth1 | --------- | veth0 | (underlay network) +# ---------- peer ---------- +# +# +# Device Configuration +# -------------------- +# Root namespace with metadata-mode tunnel + BPF +# Device names and addresses: +# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) +# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay) +# +# Namespace at_ns0 with native tunnel +# Device names and addresses: +# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) +# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay) +# +# +# End-to-end ping packet flow +# --------------------------- +# Most of the tests start by namespace creation, device configuration, +# then ping the underlay and overlay network. When doing 'ping 10.1.1.100' +# from root namespace, the following operations happen: +# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. +# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, +# with remote_ip=172.16.1.200 and others. +# 3) Outer tunnel header is prepended and route the packet to veth1's egress +# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0 +# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet +# 6) Forward the packet to the overlay tnl dev + +PING_ARG="-c 3 -w 10 -q" +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +config_device() +{ + ip netns add at_ns0 + ip link add veth0 type veth peer name veth1 + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip link set dev veth1 up mtu 1500 + ip addr add dev veth1 172.16.1.200/24 +} + +add_gre_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE key 2 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6gretap_tunnel() +{ + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ + local ::11 remote ::22 + + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV fc80::200/24 + ip link set dev $DEV up +} + +add_erspan_tunnel() +{ + # at_ns0 namespace + if [ "$1" == "v1" ]; then + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 123 + else + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir egress erspan_hwid 3 + fi + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6erspan_tunnel() +{ + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + if [ "$1" == "v1" ]; then + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local ::11 remote ::22 \ + erspan_ver 1 erspan 123 + else + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local ::11 remote ::22 \ + erspan_ver 2 erspan_dir egress erspan_hwid 7 + fi + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_vxlan_tunnel() +{ + # Set static ARP entry here because iptables set-mark works + # on L3 packet, as a result not applying to ARP packets, + # causing errors at get_tunnel_{key/opt}. + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + id 2 dstport 4789 gbp remote 172.16.1.200 + ip netns exec at_ns0 \ + ip link set dev $DEV_NS address 52:54:00:d9:01:00 up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 + ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF + + # root namespace + ip link add dev $DEV type $TYPE external gbp dstport 4789 + ip link set dev $DEV address 52:54:00:d9:02:00 up + ip addr add dev $DEV 10.1.1.200/24 + arp -s 10.1.1.100 52:54:00:d9:01:00 +} + +add_ip6vxlan_tunnel() +{ + #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0 + ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + #ip -4 addr del 172.16.1.200 dev veth1 + ip -6 addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \ + local ::11 remote ::22 + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external dstport 4789 + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_geneve_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + id 2 dstport 6081 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE dstport 6081 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6geneve_tunnel() +{ + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 22 \ + remote ::22 # geneve has no local option + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_ipip_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ipip6tnl_tunnel() +{ + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + local ::11 remote ::22 + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +test_gre() +{ + TYPE=gretap + DEV_NS=gretap00 + DEV=gretap11 + ret=0 + + check $TYPE + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel gre_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6gre() +{ + TYPE=ip6gre + DEV_NS=ip6gre00 + DEV=ip6gre11 + ret=0 + + check $TYPE + config_device + # reuse the ip6gretap function + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + ping $PING_ARG 10.1.1.100 + check_err $? + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 $PING_ARG fc80::200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6gretap() +{ + TYPE=ip6gretap + DEV_NS=ip6gretap00 + DEV=ip6gretap11 + ret=0 + + check $TYPE + config_device + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + ping $PING_ARG 10.1.1.100 + check_err $? + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 $PING_ARG fc80::200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_erspan() +{ + TYPE=erspan + DEV_NS=erspan00 + DEV=erspan11 + ret=0 + + check $TYPE + config_device + add_erspan_tunnel $1 + attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6erspan() +{ + TYPE=ip6erspan + DEV_NS=ip6erspan00 + DEV=ip6erspan11 + ret=0 + + check $TYPE + config_device + add_ip6erspan_tunnel $1 + attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel + ping6 $PING_ARG ::11 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_vxlan() +{ + TYPE=vxlan + DEV_NS=vxlan00 + DEV=vxlan11 + ret=0 + + check $TYPE + config_device + add_vxlan_tunnel + attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6vxlan() +{ + TYPE=vxlan + DEV_NS=ip6vxlan00 + DEV=ip6vxlan11 + ret=0 + + check $TYPE + config_device + add_ip6vxlan_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip4 over ip6 + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + +test_geneve() +{ + TYPE=geneve + DEV_NS=geneve00 + DEV=geneve11 + ret=0 + + check $TYPE + config_device + add_geneve_tunnel + attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6geneve() +{ + TYPE=geneve + DEV_NS=ip6geneve00 + DEV=ip6geneve11 + ret=0 + + check $TYPE + config_device + add_ip6geneve_tunnel + attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + +test_ipip() +{ + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + ret=0 + + check $TYPE + config_device + add_ipip_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ipip6() +{ + TYPE=ip6tnl + DEV_NS=ipip6tnl00 + DEV=ipip6tnl11 + ret=0 + + check $TYPE + config_device + add_ipip6tnl_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip4 over ip6 + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +setup_xfrm_tunnel() +{ + auth=0x$(printf '1%.0s' {1..40}) + enc=0x$(printf '2%.0s' {1..32}) + spi_in_to_out=0x1 + spi_out_to_in=0x2 + # at_ns0 namespace + # at_ns0 -> root + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # root -> at_ns0 + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip netns exec at_ns0 \ + ip addr add dev veth0 10.1.1.100/32 + ip netns exec at_ns0 \ + ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ + src 10.1.1.100 + + # root namespace + # at_ns0 -> root + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # root -> at_ns0 + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip addr add dev veth1 10.1.1.200/32 + ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 +} + +test_xfrm_tunnel() +{ + config_device + > /sys/kernel/debug/tracing/trace + setup_xfrm_tunnel + tc qdisc add dev veth1 clsact + tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ + sec xfrm_get_state + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + sleep 1 + grep "reqid 1" /sys/kernel/debug/tracing/trace + check_err $? + grep "spi 0x1" /sys/kernel/debug/tracing/trace + check_err $? + grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: xfrm tunnel"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: xfrm tunnel"${NC} +} + +attach_bpf() +{ + DEV=$1 + SET=$2 + GET=$3 + tc qdisc add dev $DEV clsact + tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET + tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET +} + +cleanup() +{ + ip netns delete at_ns0 2> /dev/null + ip link del veth1 2> /dev/null + ip link del ipip11 2> /dev/null + ip link del ipip6tnl11 2> /dev/null + ip link del gretap11 2> /dev/null + ip link del ip6gre11 2> /dev/null + ip link del ip6gretap11 2> /dev/null + ip link del vxlan11 2> /dev/null + ip link del ip6vxlan11 2> /dev/null + ip link del geneve11 2> /dev/null + ip link del ip6geneve11 2> /dev/null + ip link del erspan11 2> /dev/null + ip link del ip6erspan11 2> /dev/null + ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null + ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null + ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null + ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null +} + +cleanup_exit() +{ + echo "CATCH SIGKILL or SIGINT, cleanup and exit" + cleanup + exit 0 +} + +check() +{ + ip link help 2>&1 | grep -q "\s$1\s" + if [ $? -ne 0 ];then + echo "SKIP $1: iproute2 not support" + cleanup + return 1 + fi +} + +enable_debug() +{ + echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control +} + +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +bpf_tunnel_test() +{ + echo "Testing GRE tunnel..." + test_gre + echo "Testing IP6GRE tunnel..." + test_ip6gre + echo "Testing IP6GRETAP tunnel..." + test_ip6gretap + echo "Testing ERSPAN tunnel..." + test_erspan v2 + echo "Testing IP6ERSPAN tunnel..." + test_ip6erspan v2 + echo "Testing VXLAN tunnel..." + test_vxlan + echo "Testing IP6VXLAN tunnel..." + test_ip6vxlan + echo "Testing GENEVE tunnel..." + test_geneve + echo "Testing IP6GENEVE tunnel..." + test_ip6geneve + echo "Testing IPIP tunnel..." + test_ipip + echo "Testing IPIP6 tunnel..." + test_ipip6 + echo "Testing IPSec tunnel..." + test_xfrm_tunnel +} + +trap cleanup 0 3 6 +trap cleanup_exit 2 9 + +cleanup +bpf_tunnel_test + +exit 0 diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c new file mode 100644 index 000000000000..504df69c83df --- /dev/null +++ b/tools/testing/selftests/bpf/test_tunnel_kern.c @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016 VMware + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <arpa/inet.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/pkt_cls.h> +#include <linux/erspan.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define ERROR(ret) do {\ + char fmt[] = "ERROR line:%d ret:%d\n";\ + bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ + } while (0) + +int _version SEC("version") = 1; + +struct geneve_opt { + __be16 opt_class; + __u8 type; + __u8 length:5; + __u8 r3:1; + __u8 r2:1; + __u8 r1:1; + __u8 opt_data[8]; /* hard-coded to 8 byte */ +}; + +struct vxlan_metadata { + __u32 gbp; +}; + +SEC("gre_set_tunnel") +int _gre_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("gre_get_tunnel") +int _gre_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "key %d remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("ip6gretap_set_tunnel") +int _ip6gretap_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + key.tunnel_label = 0xabcde; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | + BPF_F_SEQ_NUMBER); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6gretap_get_tunnel") +int _ip6gretap_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + +SEC("erspan_set_tunnel") +int _erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&md, 0, sizeof(md)); +#ifdef ERSPAN_V1 + md.version = 1; + md.u.index = bpf_htonl(123); +#else + __u8 direction = 1; + __u8 hwid = 7; + + md.version = 2; + md.u.md2.dir = direction; + md.u.md2.hwid = hwid & 0xf; + md.u.md2.hwid_upper = (hwid >> 4) & 0x3; +#endif + + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("erspan_get_tunnel") +int _erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + __u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.version); + +#ifdef ERSPAN_V1 + char fmt2[] = "\tindex %x\n"; + + index = bpf_ntohl(md.u.index); + bpf_trace_printk(fmt2, sizeof(fmt2), index); +#else + char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; + + bpf_trace_printk(fmt2, sizeof(fmt2), + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); +#endif + + return TC_ACT_OK; +} + +SEC("ip4ip6erspan_set_tunnel") +int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&md, 0, sizeof(md)); + +#ifdef ERSPAN_V1 + md.u.index = bpf_htonl(123); + md.version = 1; +#else + __u8 direction = 0; + __u8 hwid = 17; + + md.version = 2; + md.u.md2.dir = direction; + md.u.md2.hwid = hwid & 0xf; + md.u.md2.hwid_upper = (hwid >> 4) & 0x3; +#endif + + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip4ip6erspan_get_tunnel") +int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + __u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.version); + +#ifdef ERSPAN_V1 + char fmt2[] = "\tindex %x\n"; + + index = bpf_ntohl(md.u.index); + bpf_trace_printk(fmt2, sizeof(fmt2), index); +#else + char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; + + bpf_trace_printk(fmt2, sizeof(fmt2), + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); +#endif + + return TC_ACT_OK; +} + +SEC("vxlan_set_tunnel") +int _vxlan_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("vxlan_get_tunnel") +int _vxlan_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.gbp); + + return TC_ACT_OK; +} + +SEC("ip6vxlan_set_tunnel") +int _ip6vxlan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6vxlan_get_tunnel") +int _ip6vxlan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + +SEC("geneve_set_tunnel") +int _geneve_set_tunnel(struct __sk_buff *skb) +{ + int ret, ret2; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 0; + gopt.r2 = 0; + gopt.r3 = 0; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef); + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("geneve_get_tunnel") +int _geneve_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + return TC_ACT_OK; +} + +SEC("ip6geneve_set_tunnel") +int _ip6geneve_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 0; + gopt.r2 = 0; + gopt.r3 = 0; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef); + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6geneve_get_tunnel") +int _ip6geneve_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + + return TC_ACT_OK; +} + +SEC("ipip_set_tunnel") +int _ipip_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) + return TC_ACT_SHOT; + + if (tcp->dest == bpf_htons(5200)) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + else if (tcp->dest == bpf_htons(5201)) + key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ + else + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip_get_tunnel") +int _ipip_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("ipip6_set_tunnel") +int _ipip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip6_get_tunnel") +int _ipip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("ip6ip6_set_tunnel") +int _ip6ip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct ipv6hdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = bpf_htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { + key.remote_ipv6[3] = bpf_htonl(1); + } else { + if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { + ERROR(iph->nexthdr); + return TC_ACT_SHOT; + } + + if (tcp->dest == bpf_htons(5200)) { + key.remote_ipv6[3] = bpf_htonl(1); + } else if (tcp->dest == bpf_htons(5201)) { + key.remote_ipv6[3] = bpf_htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6ip6_get_tunnel") +int _ip6ip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("xfrm_get_state") +int _xfrm_get_state(struct __sk_buff *skb) +{ + struct bpf_xfrm_state x; + char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; + int ret; + + ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); + if (ret < 0) + return TC_ACT_OK; + + bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index fd7de7eb329e..f5f7bcc96046 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -41,15 +41,16 @@ # endif #endif #include "bpf_rlimit.h" +#include "bpf_rand.h" #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif -#define MAX_INSNS 512 +#define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 4 +#define MAX_NR_MAPS 7 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -64,7 +65,10 @@ struct bpf_test { struct bpf_insn insns[MAX_INSNS]; int fixup_map1[MAX_FIXUPS]; int fixup_map2[MAX_FIXUPS]; - int fixup_prog[MAX_FIXUPS]; + int fixup_map3[MAX_FIXUPS]; + int fixup_map4[MAX_FIXUPS]; + int fixup_prog1[MAX_FIXUPS]; + int fixup_prog2[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; @@ -76,6 +80,8 @@ struct bpf_test { } result, result_unpriv; enum bpf_prog_type prog_type; uint8_t flags; + __u8 data[TEST_DATA_LEN]; + void (*fill_helper)(struct bpf_test *self); }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -88,6 +94,91 @@ struct test_val { int foo[MAX_ENTRIES]; }; +struct other_val { + long long foo; + long long bar; +}; + +static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) +{ + /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ +#define PUSH_CNT 51 + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn = self->insns; + int i = 0, j, k = 0; + + insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); +loop: + for (j = 0; j < PUSH_CNT; j++) { + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); + i++; + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); + insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); + i++; + } + + for (j = 0; j < PUSH_CNT; j++) { + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); + i++; + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_pop), + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); + i++; + } + if (++k < 5) + goto loop; + + for (; i < len - 1; i++) + insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); + insn[len - 1] = BPF_EXIT_INSN(); +} + +static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) +{ + struct bpf_insn *insn = self->insns; + unsigned int len = BPF_MAXINSNS; + int i = 0; + + insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2); + i++; + while (i < len - 1) + insn[i++] = BPF_LD_ABS(BPF_B, 1); + insn[i] = BPF_EXIT_INSN(); +} + +static void bpf_fill_rand_ld_dw(struct bpf_test *self) +{ + struct bpf_insn *insn = self->insns; + uint64_t res = 0; + int i = 0; + + insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0); + while (i < self->retval) { + uint64_t val = bpf_semi_rand_get(); + struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) }; + + res ^= val; + insn[i++] = tmp[0]; + insn[i++] = tmp[1]; + insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1); + } + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); + insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32); + insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1); + insn[i] = BPF_EXIT_INSN(); + res ^= (res >> 32); + self->retval = (uint32_t)res; +} + static struct bpf_test tests[] = { { "add+sub+mul", @@ -1597,6 +1688,121 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { + "valid access family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access local_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "invalid 64B read of family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read past end of SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, local_port) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read offset in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { "direct packet read for SK_MSG", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, @@ -2565,7 +2771,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .errstr_unpriv = "R3 leaks addr into helper", .result_unpriv = REJECT, .result = ACCEPT, @@ -2652,7 +2858,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .result = ACCEPT, .retval = 42, }, @@ -2666,7 +2872,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .result = ACCEPT, .retval = 41, }, @@ -2680,7 +2886,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .result = ACCEPT, .retval = 1, }, @@ -2694,7 +2900,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .result = ACCEPT, .retval = 2, }, @@ -2708,7 +2914,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .fixup_prog = { 1 }, + .fixup_prog1 = { 1 }, .result = ACCEPT, .retval = 2, }, @@ -2722,7 +2928,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .fixup_prog = { 2 }, + .fixup_prog1 = { 2 }, .result = ACCEPT, .retval = 42, }, @@ -4769,6 +4975,24 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_LWT_XMIT, }, { + "make headroom for LWT_XMIT", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_skb_change_head), + /* split for s390 to succeed */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 42), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_skb_change_head), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, + }, + { "invalid access of tc_classid for LWT_IN", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, @@ -5594,6 +5818,257 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "map lookup helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 8 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map update helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map update helper access to map: wrong size", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .fixup_map3 = { 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=0 size=16", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + offsetof(struct other_val, bar)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + sizeof(struct other_val) - 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct other_val, bar)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + sizeof(struct other_val) - 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, -4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct other_val, bar), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 11 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable): no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable): wrong max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct other_val, bar) + 1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 11 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=9 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -8190,7 +8665,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .errstr = "dereference of modified ctx ptr", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -11227,6 +11702,112 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, }, { + "calls: two calls returning different map pointers for lookup (hash, array)", + .insns = { + /* main prog */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_CALL_REL(11), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_CALL_REL(12), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* subprog 1 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* subprog 2 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map2 = { 13 }, + .fixup_map4 = { 16 }, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: two calls returning different map pointers for lookup (hash, map in map)", + .insns = { + /* main prog */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_CALL_REL(11), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_CALL_REL(12), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* subprog 1 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* subprog 2 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_in_map = { 16 }, + .fixup_map4 = { 13 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'map_ptr'", + }, + { + "cond: two branches returning different map pointers for lookup (tail, tail)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5 }, + .fixup_prog2 = { 2 }, + .result_unpriv = REJECT, + .errstr_unpriv = "tail_call abusing map_ptr", + .result = ACCEPT, + .retval = 42, + }, + { + "cond: two branches returning same map pointers for lookup (tail, tail)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog2 = { 2, 5 }, + .result_unpriv = ACCEPT, + .result = ACCEPT, + .retval = 42, + }, + { "search pruning: all branches should be verified (nop operation)", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -11423,6 +12004,334 @@ static struct bpf_test tests[] = { .errstr = "BPF_XADD stores into R2 packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "bpf_get_stack return R0 within range", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_4, 256), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), + BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "ld_abs: invalid op 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_DW, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "unknown opcode", + }, + { + "ld_abs: invalid op 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 256), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_IND(BPF_DW, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "unknown opcode", + }, + { + "ld_abs: nmap reduced", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26), + BPF_MOV32_IMM(BPF_REG_0, 18), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64), + BPF_LD_IND(BPF_W, BPF_REG_7, 14), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60), + BPF_MOV32_IMM(BPF_REG_0, 280971478), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60), + BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13), + BPF_MOV32_IMM(BPF_REG_0, 22), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), + BPF_LD_IND(BPF_H, BPF_REG_7, 14), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52), + BPF_MOV32_IMM(BPF_REG_0, 17366), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52), + BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV32_IMM(BPF_REG_0, 256), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .data = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 256, + }, + { + "ld_abs: div + abs, test 1", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), + BPF_LD_ABS(BPF_B, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_LD_IND(BPF_B, BPF_REG_8, -70), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 10, + }, + { + "ld_abs: div + abs, test 2", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), + BPF_LD_ABS(BPF_B, 128), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_LD_IND(BPF_B, BPF_REG_8, -70), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "ld_abs: div + abs, test 3", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "ld_abs: div + abs, test 4", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_LD_ABS(BPF_B, 256), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "ld_abs: vlan + abs, test 1", + .insns = { }, + .data = { + 0x34, + }, + .fill_helper = bpf_fill_ld_abs_vlan_push_pop, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0xbef, + }, + { + "ld_abs: vlan + abs, test 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .data = { + 0x34, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, + }, + { + "ld_abs: jump around ld_abs", + .insns = { }, + .data = { + 10, 11, + }, + .fill_helper = bpf_fill_jump_around_ld_abs, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 10, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 1", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 4090, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 2", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2047, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 3", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 511, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 4", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 5, + }, + { + "pass unmodified ctx pointer to helper", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "pass modified ctx pointer to helper, 1", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr_unpriv = "dereference of modified ctx ptr", + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "variable ctx access var_off=(0x0; 0x4)", + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -11435,12 +12344,13 @@ static int probe_filter_length(const struct bpf_insn *fp) return len + 1; } -static int create_map(uint32_t size_value, uint32_t max_elem) +static int create_map(uint32_t type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem) { int fd; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(long long), - size_value, max_elem, BPF_F_NO_PREALLOC); + fd = bpf_create_map(type, size_key, size_value, max_elem, + type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0); if (fd < 0) printf("Failed to create hash map '%s'!\n", strerror(errno)); @@ -11473,13 +12383,13 @@ static int create_prog_dummy2(int mfd, int idx) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(void) +static int create_prog_array(uint32_t max_elem, int p1key) { - int p1key = 0, p2key = 1; + int p2key = 1; int mfd, p1fd, p2fd; mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), - sizeof(int), 4, 0); + sizeof(int), max_elem, 0); if (mfd < 0) { printf("Failed to create prog array '%s'!\n", strerror(errno)); return -1; @@ -11526,22 +12436,29 @@ static int create_map_in_map(void) return outer_map_fd; } -static char bpf_vlog[32768]; +static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, int *map_fds) { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; - int *fixup_prog = test->fixup_prog; + int *fixup_map3 = test->fixup_map3; + int *fixup_map4 = test->fixup_map4; + int *fixup_prog1 = test->fixup_prog1; + int *fixup_prog2 = test->fixup_prog2; int *fixup_map_in_map = test->fixup_map_in_map; + if (test->fill_helper) + test->fill_helper(test); + /* Allocating HTs with 1 elem is fine here, since we only test * for verifier and not do a runtime lookup, so the only thing * that really matters is value size in this case. */ if (*fixup_map1) { - map_fds[0] = create_map(sizeof(long long), 1); + map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), + sizeof(long long), 1); do { prog[*fixup_map1].imm = map_fds[0]; fixup_map1++; @@ -11549,25 +12466,52 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_map2) { - map_fds[1] = create_map(sizeof(struct test_val), 1); + map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), + sizeof(struct test_val), 1); do { prog[*fixup_map2].imm = map_fds[1]; fixup_map2++; } while (*fixup_map2); } - if (*fixup_prog) { - map_fds[2] = create_prog_array(); + if (*fixup_map3) { + map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), + sizeof(struct other_val), 1); + do { + prog[*fixup_map3].imm = map_fds[2]; + fixup_map3++; + } while (*fixup_map3); + } + + if (*fixup_map4) { + map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(struct test_val), 1); + do { + prog[*fixup_map4].imm = map_fds[3]; + fixup_map4++; + } while (*fixup_map4); + } + + if (*fixup_prog1) { + map_fds[4] = create_prog_array(4, 0); do { - prog[*fixup_prog].imm = map_fds[2]; - fixup_prog++; - } while (*fixup_prog); + prog[*fixup_prog1].imm = map_fds[4]; + fixup_prog1++; + } while (*fixup_prog1); + } + + if (*fixup_prog2) { + map_fds[5] = create_prog_array(8, 7); + do { + prog[*fixup_prog2].imm = map_fds[5]; + fixup_prog2++; + } while (*fixup_prog2); } if (*fixup_map_in_map) { - map_fds[3] = create_map_in_map(); + map_fds[6] = create_map_in_map(); do { - prog[*fixup_map_in_map].imm = map_fds[3]; + prog[*fixup_map_in_map].imm = map_fds[6]; fixup_map_in_map++; } while (*fixup_map_in_map); } @@ -11577,10 +12521,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { int fd_prog, expected_ret, reject_from_alignment; + int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; - int prog_len = probe_filter_length(prog); - char data_in[TEST_DATA_LEN] = {}; - int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; uint32_t retval; @@ -11590,6 +12532,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, map_fds[i] = -1; do_test_fixup(test, prog, map_fds); + prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, @@ -11629,8 +12572,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } if (fd_prog >= 0) { - err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), - NULL, NULL, &retval, NULL); + __u8 tmp[TEST_DATA_LEN << 2]; + __u32 size_tmp = sizeof(tmp); + + err = bpf_prog_test_run(fd_prog, 1, test->data, + sizeof(test->data), tmp, &size_tmp, + &retval, NULL); if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { printf("Unexpected bpf_prog_test_run error\n"); goto fail_log; @@ -11788,5 +12735,6 @@ int main(int argc, char **argv) return EXIT_FAILURE; } + bpf_semi_rand_init(); return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c new file mode 100644 index 000000000000..3868dcb63420 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <poll.h> +#include <unistd.h> +#include <linux/perf_event.h> +#include <sys/mman.h> +#include "trace_helpers.h" + +#define MAX_SYMS 300000 +static struct ksym syms[MAX_SYMS]; +static int sym_cnt; + +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +int load_kallsyms(void) +{ + FILE *f = fopen("/proc/kallsyms", "r"); + char func[256], buf[256]; + char symbol; + void *addr; + int i = 0; + + if (!f) + return -ENOENT; + + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) + break; + if (!addr) + continue; + syms[i].addr = (long) addr; + syms[i].name = strdup(func); + i++; + } + sym_cnt = i; + qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); + return 0; +} + +struct ksym *ksym_search(long key) +{ + int start = 0, end = sym_cnt; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = key - syms[mid].addr; + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return &syms[mid]; + } + + if (start >= 1 && syms[start - 1].addr < key && + key < syms[start].addr) + /* valid ksym */ + return &syms[start - 1]; + + /* out of range. return _stext */ + return &syms[0]; +} + +long ksym_get_addr(const char *name) +{ + int i; + + for (i = 0; i < sym_cnt; i++) { + if (strcmp(syms[i].name, name) == 0) + return syms[i].addr; + } + + return 0; +} + +static int page_size; +static int page_cnt = 8; +static struct perf_event_mmap_page *header; + +int perf_event_mmap(int fd) +{ + void *base; + int mmap_size; + + page_size = getpagesize(); + mmap_size = page_size * (page_cnt + 1); + + base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (base == MAP_FAILED) { + printf("mmap err\n"); + return -1; + } + + header = base; + return 0; +} + +static int perf_event_poll(int fd) +{ + struct pollfd pfd = { .fd = fd, .events = POLLIN }; + + return poll(&pfd, 1, 1000); +} + +struct perf_event_sample { + struct perf_event_header header; + __u32 size; + char data[]; +}; + +static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) +{ + struct perf_event_sample *e = event; + perf_event_print_fn fn = priv; + int ret; + + if (e->header.type == PERF_RECORD_SAMPLE) { + ret = fn(e->data, e->size); + if (ret != LIBBPF_PERF_EVENT_CONT) + return ret; + } else if (e->header.type == PERF_RECORD_LOST) { + struct { + struct perf_event_header header; + __u64 id; + __u64 lost; + } *lost = (void *) e; + printf("lost %lld events\n", lost->lost); + } else { + printf("unknown event type=%d size=%d\n", + e->header.type, e->header.size); + } + + return LIBBPF_PERF_EVENT_CONT; +} + +int perf_event_poller(int fd, perf_event_print_fn output_fn) +{ + enum bpf_perf_event_ret ret; + void *buf = NULL; + size_t len = 0; + + for (;;) { + perf_event_poll(fd); + ret = bpf_perf_event_read_simple(header, page_cnt * page_size, + page_size, &buf, &len, + bpf_perf_event_print, + output_fn); + if (ret != LIBBPF_PERF_EVENT_CONT) + break; + } + free(buf); + + return ret; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h new file mode 100644 index 000000000000..3b4bcf7f5084 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TRACE_HELPER_H +#define __TRACE_HELPER_H + +#include <libbpf.h> + +struct ksym { + long addr; + char *name; +}; + +int load_kallsyms(void); +struct ksym *ksym_search(long key); +long ksym_get_addr(const char *name); + +typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size); + +int perf_event_mmap(int fd); +/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */ +int perf_event_poller(int fd, perf_event_print_fn output_fn); +#endif diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index 4acfdebf36fa..9de8b7cb4e6d 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -6,15 +6,21 @@ #include <stdlib.h> #define BUF_SIZE 256 -int main(void) + +int main(int argc, char *argv[]) { int fd = open("/dev/urandom", O_RDONLY); int i; char buf[BUF_SIZE]; + int count = 4; if (fd < 0) return 1; - for (i = 0; i < 4; ++i) + + if (argc == 2) + count = atoi(argv[1]); + + for (i = 0; i < count; ++i) read(fd, buf, BUF_SIZE); close(fd); diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 3fece06e9f64..f82dcc1f8841 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -143,10 +143,14 @@ void suspend(void) int err; struct itimerspec spec = {}; + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); + power_state_fd = open("/sys/power/state", O_RDWR); if (power_state_fd < 0) ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed (is this test running as root?)\n"); + "open(\"/sys/power/state\") failed %s)\n", + strerror(errno)); timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile new file mode 100644 index 000000000000..f7a31392eb2f --- /dev/null +++ b/tools/testing/selftests/cgroup/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wall + +all: + +TEST_GEN_PROGS = test_memcontrol + +include ../lib.mk + +$(OUTPUT)/test_memcontrol: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c new file mode 100644 index 000000000000..1e9e3c470561 --- /dev/null +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <linux/limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "cgroup_util.h" + +static ssize_t read_text(const char *path, char *buf, size_t max_len) +{ + ssize_t len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + len = read(fd, buf, max_len - 1); + if (len < 0) + goto out; + + buf[len] = 0; +out: + close(fd); + return len; +} + +static ssize_t write_text(const char *path, char *buf, ssize_t len) +{ + int fd; + + fd = open(path, O_WRONLY | O_APPEND); + if (fd < 0) + return fd; + + len = write(fd, buf, len); + if (len < 0) { + close(fd); + return len; + } + + close(fd); + + return len; +} + +char *cg_name(const char *root, const char *name) +{ + size_t len = strlen(root) + strlen(name) + 2; + char *ret = malloc(len); + + snprintf(ret, len, "%s/%s", root, name); + + return ret; +} + +char *cg_name_indexed(const char *root, const char *name, int index) +{ + size_t len = strlen(root) + strlen(name) + 10; + char *ret = malloc(len); + + snprintf(ret, len, "%s/%s_%d", root, name, index); + + return ret; +} + +int cg_read(const char *cgroup, const char *control, char *buf, size_t len) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/%s", cgroup, control); + + if (read_text(path, buf, len) >= 0) + return 0; + + return -1; +} + +int cg_read_strcmp(const char *cgroup, const char *control, + const char *expected) +{ + size_t size = strlen(expected) + 1; + char *buf; + + buf = malloc(size); + if (!buf) + return -1; + + if (cg_read(cgroup, control, buf, size)) + return -1; + + return strcmp(expected, buf); +} + +int cg_read_strstr(const char *cgroup, const char *control, const char *needle) +{ + char buf[PAGE_SIZE]; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + return strstr(buf, needle) ? 0 : -1; +} + +long cg_read_long(const char *cgroup, const char *control) +{ + char buf[128]; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + return atol(buf); +} + +long cg_read_key_long(const char *cgroup, const char *control, const char *key) +{ + char buf[PAGE_SIZE]; + char *ptr; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + ptr = strstr(buf, key); + if (!ptr) + return -1; + + return atol(ptr + strlen(key)); +} + +int cg_write(const char *cgroup, const char *control, char *buf) +{ + char path[PATH_MAX]; + ssize_t len = strlen(buf); + + snprintf(path, sizeof(path), "%s/%s", cgroup, control); + + if (write_text(path, buf, len) == len) + return 0; + + return -1; +} + +int cg_find_unified_root(char *root, size_t len) +{ + char buf[10 * PAGE_SIZE]; + char *fs, *mount, *type; + const char delim[] = "\n\t "; + + if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) + return -1; + + /* + * Example: + * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 + */ + for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { + mount = strtok(NULL, delim); + type = strtok(NULL, delim); + strtok(NULL, delim); + strtok(NULL, delim); + strtok(NULL, delim); + + if (strcmp(fs, "cgroup") == 0 && + strcmp(type, "cgroup2") == 0) { + strncpy(root, mount, len); + return 0; + } + } + + return -1; +} + +int cg_create(const char *cgroup) +{ + return mkdir(cgroup, 0644); +} + +static int cg_killall(const char *cgroup) +{ + char buf[PAGE_SIZE]; + char *ptr = buf; + + if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) + return -1; + + while (ptr < buf + sizeof(buf)) { + int pid = strtol(ptr, &ptr, 10); + + if (pid == 0) + break; + if (*ptr) + ptr++; + else + break; + if (kill(pid, SIGKILL)) + return -1; + } + + return 0; +} + +int cg_destroy(const char *cgroup) +{ + int ret; + +retry: + ret = rmdir(cgroup); + if (ret && errno == EBUSY) { + ret = cg_killall(cgroup); + if (ret) + return ret; + usleep(100); + goto retry; + } + + if (ret && errno == ENOENT) + ret = 0; + + return ret; +} + +int cg_run(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int pid, retcode; + + pid = fork(); + if (pid < 0) { + return pid; + } else if (pid == 0) { + char buf[64]; + + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cgroup, "cgroup.procs", buf)) + exit(EXIT_FAILURE); + exit(fn(cgroup, arg)); + } else { + waitpid(pid, &retcode, 0); + if (WIFEXITED(retcode)) + return WEXITSTATUS(retcode); + else + return -1; + } +} + +int cg_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int pid; + + pid = fork(); + if (pid == 0) { + char buf[64]; + + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cgroup, "cgroup.procs", buf)) + exit(EXIT_FAILURE); + exit(fn(cgroup, arg)); + } + + return pid; +} + +int get_temp_fd(void) +{ + return open(".", O_TMPFILE | O_RDWR | O_EXCL); +} + +int alloc_pagecache(int fd, size_t size) +{ + char buf[PAGE_SIZE]; + struct stat st; + int i; + + if (fstat(fd, &st)) + goto cleanup; + + size += st.st_size; + + if (ftruncate(fd, size)) + goto cleanup; + + for (i = 0; i < size; i += sizeof(buf)) + read(fd, buf, sizeof(buf)); + + return 0; + +cleanup: + return -1; +} + +int alloc_anon(const char *cgroup, void *arg) +{ + size_t size = (unsigned long)arg; + char *buf, *ptr; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + free(buf); + return 0; +} + +int is_swap_enabled(void) +{ + char buf[PAGE_SIZE]; + const char delim[] = "\n"; + int cnt = 0; + char *line; + + if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) + return -1; + + for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) + cnt++; + + return cnt > 1; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h new file mode 100644 index 000000000000..fe82a297d4e0 --- /dev/null +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdlib.h> + +#define PAGE_SIZE 4096 + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define MB(x) (x << 20) + +/* + * Checks if two given values differ by less than err% of their sum. + */ +static inline int values_close(long a, long b, int err) +{ + return abs(a - b) <= (a + b) / 100 * err; +} + +extern int cg_find_unified_root(char *root, size_t len); +extern char *cg_name(const char *root, const char *name); +extern char *cg_name_indexed(const char *root, const char *name, int index); +extern int cg_create(const char *cgroup); +extern int cg_destroy(const char *cgroup); +extern int cg_read(const char *cgroup, const char *control, + char *buf, size_t len); +extern int cg_read_strcmp(const char *cgroup, const char *control, + const char *expected); +extern int cg_read_strstr(const char *cgroup, const char *control, + const char *needle); +extern long cg_read_long(const char *cgroup, const char *control); +long cg_read_key_long(const char *cgroup, const char *control, const char *key); +extern int cg_write(const char *cgroup, const char *control, char *buf); +extern int cg_run(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg); +extern int cg_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg); +extern int get_temp_fd(void); +extern int alloc_pagecache(int fd, size_t size); +extern int alloc_anon(const char *cgroup, void *arg); +extern int is_swap_enabled(void); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c new file mode 100644 index 000000000000..cf0bddc9d271 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -0,0 +1,1015 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netdb.h> +#include <errno.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +/* + * This test creates two nested cgroups with and without enabling + * the memory controller. + */ +static int test_memcg_subtree_control(const char *root) +{ + char *parent, *child, *parent2, *child2; + int ret = KSFT_FAIL; + char buf[PAGE_SIZE]; + + /* Create two nested cgroups with the memory controller enabled */ + parent = cg_name(root, "memcg_test_0"); + child = cg_name(root, "memcg_test_0/memcg_test_1"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_read_strstr(child, "cgroup.controllers", "memory")) + goto cleanup; + + /* Create two nested cgroups without enabling memory controller */ + parent2 = cg_name(root, "memcg_test_1"); + child2 = cg_name(root, "memcg_test_1/memcg_test_1"); + if (!parent2 || !child2) + goto cleanup; + + if (cg_create(parent2)) + goto cleanup; + + if (cg_create(child2)) + goto cleanup; + + if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) + goto cleanup; + + if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + cg_destroy(parent); + free(parent); + free(child); + + cg_destroy(child2); + cg_destroy(parent2); + free(parent2); + free(child2); + + return ret; +} + +static int alloc_anon_50M_check(const char *cgroup, void *arg) +{ + size_t size = MB(50); + char *buf, *ptr; + long anon, current; + int ret = -1; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + current = cg_read_long(cgroup, "memory.current"); + if (current < size) + goto cleanup; + + if (!values_close(size, current, 3)) + goto cleanup; + + anon = cg_read_key_long(cgroup, "memory.stat", "anon "); + if (anon < 0) + goto cleanup; + + if (!values_close(anon, current, 3)) + goto cleanup; + + ret = 0; +cleanup: + free(buf); + return ret; +} + +static int alloc_pagecache_50M_check(const char *cgroup, void *arg) +{ + size_t size = MB(50); + int ret = -1; + long current, file; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + return -1; + + if (alloc_pagecache(fd, size)) + goto cleanup; + + current = cg_read_long(cgroup, "memory.current"); + if (current < size) + goto cleanup; + + file = cg_read_key_long(cgroup, "memory.stat", "file "); + if (file < 0) + goto cleanup; + + if (!values_close(file, current, 10)) + goto cleanup; + + ret = 0; + +cleanup: + close(fd); + return ret; +} + +/* + * This test create a memory cgroup, allocates + * some anonymous memory and some pagecache + * and check memory.current and some memory.stat values. + */ +static int test_memcg_current(const char *root) +{ + int ret = KSFT_FAIL; + long current; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + if (cg_run(memcg, alloc_anon_50M_check, NULL)) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int alloc_pagecache_50M(const char *cgroup, void *arg) +{ + int fd = (long)arg; + + return alloc_pagecache(fd, MB(50)); +} + +static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) +{ + int fd = (long)arg; + int ppid = getppid(); + + if (alloc_pagecache(fd, MB(50))) + return -1; + + while (getppid() == ppid) + sleep(1); + + return 0; +} + +/* + * First, this test creates the following hierarchy: + * A memory.min = 50M, memory.max = 200M + * A/B memory.min = 50M, memory.current = 50M + * A/B/C memory.min = 75M, memory.current = 50M + * A/B/D memory.min = 25M, memory.current = 50M + * A/B/E memory.min = 500M, memory.current = 0 + * A/B/F memory.min = 0, memory.current = 50M + * + * Usages are pagecache, but the test keeps a running + * process in every leaf cgroup. + * Then it creates A/G and creates a significant + * memory pressure in it. + * + * A/B memory.current ~= 50M + * A/B/C memory.current ~= 33M + * A/B/D memory.current ~= 17M + * A/B/E memory.current ~= 0 + * + * After that it tries to allocate more than there is + * unprotected memory in A available, and checks + * checks that memory.min protects pagecache even + * in this case. + */ +static int test_memcg_min(const char *root) +{ + int ret = KSFT_FAIL; + char *parent[3] = {NULL}; + char *children[4] = {NULL}; + long c[4]; + int i, attempts; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + parent[0] = cg_name(root, "memcg_test_0"); + if (!parent[0]) + goto cleanup; + + parent[1] = cg_name(parent[0], "memcg_test_1"); + if (!parent[1]) + goto cleanup; + + parent[2] = cg_name(parent[0], "memcg_test_2"); + if (!parent[2]) + goto cleanup; + + if (cg_create(parent[0])) + goto cleanup; + + if (cg_read_long(parent[0], "memory.min")) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_write(parent[0], "memory.max", "200M")) + goto cleanup; + + if (cg_write(parent[0], "memory.swap.max", "0")) + goto cleanup; + + if (cg_create(parent[1])) + goto cleanup; + + if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(parent[2])) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i] = cg_name_indexed(parent[1], "child_memcg", i); + if (!children[i]) + goto cleanup; + + if (cg_create(children[i])) + goto cleanup; + + if (i == 2) + continue; + + cg_run_nowait(children[i], alloc_pagecache_50M_noexit, + (void *)(long)fd); + } + + if (cg_write(parent[0], "memory.min", "50M")) + goto cleanup; + if (cg_write(parent[1], "memory.min", "50M")) + goto cleanup; + if (cg_write(children[0], "memory.min", "75M")) + goto cleanup; + if (cg_write(children[1], "memory.min", "25M")) + goto cleanup; + if (cg_write(children[2], "memory.min", "500M")) + goto cleanup; + if (cg_write(children[3], "memory.min", "0")) + goto cleanup; + + attempts = 0; + while (!values_close(cg_read_long(parent[1], "memory.current"), + MB(150), 3)) { + if (attempts++ > 5) + break; + sleep(1); + } + + if (cg_run(parent[2], alloc_anon, (void *)MB(148))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) + c[i] = cg_read_long(children[i], "memory.current"); + + if (!values_close(c[0], MB(33), 10)) + goto cleanup; + + if (!values_close(c[1], MB(17), 10)) + goto cleanup; + + if (!values_close(c[2], 0, 1)) + goto cleanup; + + if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { + if (!children[i]) + continue; + + cg_destroy(children[i]); + free(children[i]); + } + + for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { + if (!parent[i]) + continue; + + cg_destroy(parent[i]); + free(parent[i]); + } + close(fd); + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A memory.low = 50M, memory.max = 200M + * A/B memory.low = 50M, memory.current = 50M + * A/B/C memory.low = 75M, memory.current = 50M + * A/B/D memory.low = 25M, memory.current = 50M + * A/B/E memory.low = 500M, memory.current = 0 + * A/B/F memory.low = 0, memory.current = 50M + * + * Usages are pagecache. + * Then it creates A/G an creates a significant + * memory pressure in it. + * + * Then it checks actual memory usages and expects that: + * A/B memory.current ~= 50M + * A/B/ memory.current ~= 33M + * A/B/D memory.current ~= 17M + * A/B/E memory.current ~= 0 + * + * After that it tries to allocate more than there is + * unprotected memory in A available, + * and checks low and oom events in memory.events. + */ +static int test_memcg_low(const char *root) +{ + int ret = KSFT_FAIL; + char *parent[3] = {NULL}; + char *children[4] = {NULL}; + long low, oom; + long c[4]; + int i; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + parent[0] = cg_name(root, "memcg_test_0"); + if (!parent[0]) + goto cleanup; + + parent[1] = cg_name(parent[0], "memcg_test_1"); + if (!parent[1]) + goto cleanup; + + parent[2] = cg_name(parent[0], "memcg_test_2"); + if (!parent[2]) + goto cleanup; + + if (cg_create(parent[0])) + goto cleanup; + + if (cg_read_long(parent[0], "memory.low")) + goto cleanup; + + if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_write(parent[0], "memory.max", "200M")) + goto cleanup; + + if (cg_write(parent[0], "memory.swap.max", "0")) + goto cleanup; + + if (cg_create(parent[1])) + goto cleanup; + + if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(parent[2])) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i] = cg_name_indexed(parent[1], "child_memcg", i); + if (!children[i]) + goto cleanup; + + if (cg_create(children[i])) + goto cleanup; + + if (i == 2) + continue; + + if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) + goto cleanup; + } + + if (cg_write(parent[0], "memory.low", "50M")) + goto cleanup; + if (cg_write(parent[1], "memory.low", "50M")) + goto cleanup; + if (cg_write(children[0], "memory.low", "75M")) + goto cleanup; + if (cg_write(children[1], "memory.low", "25M")) + goto cleanup; + if (cg_write(children[2], "memory.low", "500M")) + goto cleanup; + if (cg_write(children[3], "memory.low", "0")) + goto cleanup; + + if (cg_run(parent[2], alloc_anon, (void *)MB(148))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) + c[i] = cg_read_long(children[i], "memory.current"); + + if (!values_close(c[0], MB(33), 10)) + goto cleanup; + + if (!values_close(c[1], MB(17), 10)) + goto cleanup; + + if (!values_close(c[2], 0, 1)) + goto cleanup; + + if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { + fprintf(stderr, + "memory.low prevents from allocating anon memory\n"); + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + oom = cg_read_key_long(children[i], "memory.events", "oom "); + low = cg_read_key_long(children[i], "memory.events", "low "); + + if (oom) + goto cleanup; + if (i < 2 && low <= 0) + goto cleanup; + if (i >= 2 && low) + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { + if (!children[i]) + continue; + + cg_destroy(children[i]); + free(children[i]); + } + + for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { + if (!parent[i]) + continue; + + cg_destroy(parent[i]); + free(parent[i]); + } + close(fd); + return ret; +} + +static int alloc_pagecache_max_30M(const char *cgroup, void *arg) +{ + size_t size = MB(50); + int ret = -1; + long current; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + return -1; + + if (alloc_pagecache(fd, size)) + goto cleanup; + + current = cg_read_long(cgroup, "memory.current"); + if (current <= MB(29) || current > MB(30)) + goto cleanup; + + ret = 0; + +cleanup: + close(fd); + return ret; + +} + +/* + * This test checks that memory.high limits the amount of + * memory which can be consumed by either anonymous memory + * or pagecache. + */ +static int test_memcg_high(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long high; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.high", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.high", "30M")) + goto cleanup; + + if (cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) + goto cleanup; + + high = cg_read_key_long(memcg, "memory.events", "high "); + if (high <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +/* + * This test checks that memory.max limits the amount of + * memory which can be consumed by either anonymous memory + * or pagecache. + */ +static int test_memcg_max(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long current, max; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.max", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + /* Should be killed by OOM killer */ + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current > MB(30) || !current) + goto cleanup; + + max = cg_read_key_long(memcg, "memory.events", "max "); + if (max <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) +{ + long mem_max = (long)arg; + size_t size = MB(50); + char *buf, *ptr; + long mem_current, swap_current; + int ret = -1; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + mem_current = cg_read_long(cgroup, "memory.current"); + if (!mem_current || !values_close(mem_current, mem_max, 3)) + goto cleanup; + + swap_current = cg_read_long(cgroup, "memory.swap.current"); + if (!swap_current || + !values_close(mem_current + swap_current, size, 3)) + goto cleanup; + + ret = 0; +cleanup: + free(buf); + return ret; +} + +/* + * This test checks that memory.swap.max limits the amount of + * anonymous memory which can be swapped out. + */ +static int test_memcg_swap_max(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long max; + + if (!is_swap_enabled()) + return KSFT_SKIP; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_long(memcg, "memory.swap.current")) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_read_strcmp(memcg, "memory.max", "max\n")) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "30M")) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + /* Should be killed by OOM killer */ + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) + goto cleanup; + + if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) + goto cleanup; + + max = cg_read_key_long(memcg, "memory.events", "max "); + if (max <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +/* + * This test disables swapping and tries to allocate anonymous memory + * up to OOM. Then it checks for oom and oom_kill events in + * memory.events. + */ +static int test_memcg_oom_events(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_read_strcmp(memcg, "cgroup.procs", "")) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +struct tcp_server_args { + unsigned short port; + int ctl[2]; +}; + +static int tcp_server(const char *cgroup, void *arg) +{ + struct tcp_server_args *srv_args = arg; + struct sockaddr_in6 saddr = { 0 }; + socklen_t slen = sizeof(saddr); + int sk, client_sk, ctl_fd, yes = 1, ret = -1; + + close(srv_args->ctl[0]); + ctl_fd = srv_args->ctl[1]; + + saddr.sin6_family = AF_INET6; + saddr.sin6_addr = in6addr_any; + saddr.sin6_port = htons(srv_args->port); + + sk = socket(AF_INET6, SOCK_STREAM, 0); + if (sk < 0) + return ret; + + if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) + goto cleanup; + + if (bind(sk, (struct sockaddr *)&saddr, slen)) { + write(ctl_fd, &errno, sizeof(errno)); + goto cleanup; + } + + if (listen(sk, 1)) + goto cleanup; + + ret = 0; + if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { + ret = -1; + goto cleanup; + } + + client_sk = accept(sk, NULL, NULL); + if (client_sk < 0) + goto cleanup; + + ret = -1; + for (;;) { + uint8_t buf[0x100000]; + + if (write(client_sk, buf, sizeof(buf)) <= 0) { + if (errno == ECONNRESET) + ret = 0; + break; + } + } + + close(client_sk); + +cleanup: + close(sk); + return ret; +} + +static int tcp_client(const char *cgroup, unsigned short port) +{ + const char server[] = "localhost"; + struct addrinfo *ai; + char servport[6]; + int retries = 0x10; /* nice round number */ + int sk, ret; + + snprintf(servport, sizeof(servport), "%hd", port); + ret = getaddrinfo(server, servport, NULL, &ai); + if (ret) + return ret; + + sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (sk < 0) + goto free_ainfo; + + ret = connect(sk, ai->ai_addr, ai->ai_addrlen); + if (ret < 0) + goto close_sk; + + ret = KSFT_FAIL; + while (retries--) { + uint8_t buf[0x100000]; + long current, sock; + + if (read(sk, buf, sizeof(buf)) <= 0) + goto close_sk; + + current = cg_read_long(cgroup, "memory.current"); + sock = cg_read_key_long(cgroup, "memory.stat", "sock "); + + if (current < 0 || sock < 0) + goto close_sk; + + if (current < sock) + goto close_sk; + + if (values_close(current, sock, 10)) { + ret = KSFT_PASS; + break; + } + } + +close_sk: + close(sk); +free_ainfo: + freeaddrinfo(ai); + return ret; +} + +/* + * This test checks socket memory accounting. + * The test forks a TCP server listens on a random port between 1000 + * and 61000. Once it gets a client connection, it starts writing to + * its socket. + * The TCP client interleaves reads from the socket with check whether + * memory.current and memory.stat.sock are similar. + */ +static int test_memcg_sock(const char *root) +{ + int bind_retries = 5, ret = KSFT_FAIL, pid, err; + unsigned short port; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + while (bind_retries--) { + struct tcp_server_args args; + + if (pipe(args.ctl)) + goto cleanup; + + port = args.port = 1000 + rand() % 60000; + + pid = cg_run_nowait(memcg, tcp_server, &args); + if (pid < 0) + goto cleanup; + + close(args.ctl[1]); + if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) + goto cleanup; + close(args.ctl[0]); + + if (!err) + break; + if (err != EADDRINUSE) + goto cleanup; + + waitpid(pid, NULL, 0); + } + + if (err == EADDRINUSE) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (tcp_client(memcg, port) != KSFT_PASS) + goto cleanup; + + waitpid(pid, &err, 0); + if (WEXITSTATUS(err)) + goto cleanup; + + if (cg_read_long(memcg, "memory.current") < 0) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.stat", "sock ")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +#define T(x) { x, #x } +struct memcg_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_memcg_subtree_control), + T(test_memcg_current), + T(test_memcg_min), + T(test_memcg_low), + T(test_memcg_high), + T(test_memcg_max), + T(test_memcg_oom_events), + T(test_memcg_swap_max), + T(test_memcg_sock), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index f3a8933c1275..bab13dd025a6 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 prerequisite() { @@ -9,7 +11,7 @@ prerequisite() if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi taskset -p 01 $$ @@ -18,12 +20,12 @@ prerequisite() if [ ! -d "$SYSFS" ]; then echo $msg sysfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then echo $msg cpu hotplug is not supported >&2 - exit 0 + exit $ksft_skip fi echo "CPU online/offline summary:" @@ -32,7 +34,7 @@ prerequisite() if [[ "$online_cpus" = "$online_max" ]]; then echo "$msg: since there is only one cpu: $online_cpus" - exit 0 + exit $ksft_skip fi echo -e "\t Cpus in online state: $online_cpus" @@ -237,12 +239,12 @@ prerequisite_extra() if [ ! -d "$DEBUGFS" ]; then echo $msg debugfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then echo $msg cpu-notifier-error-inject module is not available >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index d83922de9d89..31f8c9a76c5f 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -13,6 +13,9 @@ SYSFS= CPUROOT= CPUFREQROOT= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + helpme() { printf "Usage: $0 [-h] [-todg args] @@ -38,7 +41,7 @@ prerequisite() if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 2 + exit $ksft_skip fi taskset -p 01 $$ diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh new file mode 100755 index 000000000000..1893d0f59ad7 --- /dev/null +++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; } + +while getopts "h:b:p:" arg; do + case "${arg}" in + h) + usage + ;; + b) + busid=${OPTARG} + ;; + p) + tools_path=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${busid}" ]; then + usage +fi + +echo "Running USB over IP Testing on $busid"; + +test_end_msg="End of USB over IP Testing on $busid" + +if [ $UID != 0 ]; then + echo "Please run usbip_test as root [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Load usbip_host module" +if ! /sbin/modprobe -q -n usbip_host; then + echo "usbip_test: module usbip_host is not found [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +if /sbin/modprobe -q usbip_host; then + /sbin/modprobe -q -r test_bitmap + echo "usbip_test: module usbip_host is loaded [OK]" +else + echo "usbip_test: module usbip_host failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi + +echo "Load vhci_hcd module" +if /sbin/modprobe -q vhci_hcd; then + /sbin/modprobe -q -r test_bitmap + echo "usbip_test: module vhci_hcd is loaded [OK]" +else + echo "usbip_test: module vhci_hcd failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi +echo "==============================================================" + +cd $tools_path; + +if [ ! -f src/usbip ]; then + echo "Please build usbip tools" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Run lsusb to see all usb devices" +lsusb -t; +echo "==============================================================" + +src/usbipd -D; + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be under usbip_host control" +lsusb -t; +echo "==============================================================" + +echo "bind devices - expect already bound messages" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "unbind devices"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "unbind devices - expect no devices bound message"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should fail with no devices" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "List imported devices - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should work" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "List imported devices - expect to see imported devices"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - expect already imported messages" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "Un-import devices"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Un-import devices - expect no devices to detach messages"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "Detach invalid port tests - expect invalid port error message"; +src/usbip detach -p 100; +echo "==============================================================" + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Remove usbip_host module"; +rmmod usbip_host; + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "Run bind without usbip_host - expect fail" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - devices that failed to bind aren't bound to any driver" +lsusb -t; +echo "==============================================================" + +echo "modprobe usbip_host - does it work?" +/sbin/modprobe usbip_host +echo "Should see -busid- is not in match_busid table... skip! dmesg" +echo "==============================================================" +dmesg | grep "is not in match_busid table" +echo "==============================================================" + +echo $test_end_msg diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index c6d5790575ae..a47029a799d2 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -4,18 +4,21 @@ efivarfs_mount=/sys/firmware/efi/efivars test_guid=210be57c-9849-4fc7-a635-e6382d1aec27 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + check_prereqs() { local msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then echo $msg efivarfs is not mounted on $efivarfs_mount >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 67cd4597db2b..47cbf54d0801 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -20,6 +20,8 @@ #include <string.h> #include <unistd.h> +#include "../kselftest.h" + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -249,8 +251,8 @@ static int run_tests(void) errno = 0; execveat_(-1, NULL, NULL, NULL, 0); if (errno == ENOSYS) { - printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); - return 1; + ksft_exit_skip( + "ENOSYS calling execveat - no kernel support?\n"); } /* Change file position to confirm it doesn't affect anything */ diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 5c7d7001ad37..129880fb42d3 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += -I../../../../usr/include/ TEST_GEN_PROGS := devpts_pts TEST_GEN_PROGS_EXTENDED := dnotify_test diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c index b9055e974289..b1fc9b916ace 100644 --- a/tools/testing/selftests/filesystems/devpts_pts.c +++ b/tools/testing/selftests/filesystems/devpts_pts.c @@ -8,9 +8,10 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <sys/ioctl.h> +#include <asm/ioctls.h> #include <sys/mount.h> #include <sys/wait.h> +#include "../kselftest.h" static bool terminal_dup2(int duplicate, int original) { @@ -125,10 +126,12 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents) if (errno == EINVAL) { fprintf(stderr, "TIOCGPTPEER is not supported. " "Skipping test.\n"); - fret = EXIT_SUCCESS; + fret = KSFT_SKIP; + } else { + fprintf(stderr, + "Failed to perform TIOCGPTPEER ioctl\n"); + fret = EXIT_FAILURE; } - - fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n"); goto do_cleanup; } @@ -279,9 +282,9 @@ int main(int argc, char *argv[]) int ret; if (!isatty(STDIN_FILENO)) { - fprintf(stderr, "Standard input file desciptor is not attached " + fprintf(stderr, "Standard input file descriptor is not attached " "to a terminal. Skipping test\n"); - exit(EXIT_FAILURE); + exit(KSFT_SKIP); } ret = unshare(CLONE_NEWNS); diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 8e2e34a2ca69..70d18be46af5 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -74,7 +74,7 @@ load_fw_custom() { if [ ! -e "$DIR"/trigger_custom_fallback ]; then echo "$0: custom fallback trigger not present, ignoring test" >&2 - return 1 + exit $ksft_skip fi local name="$1" @@ -107,7 +107,7 @@ load_fw_custom_cancel() { if [ ! -e "$DIR"/trigger_custom_fallback ]; then echo "$0: canceling custom fallback trigger not present, ignoring test" >&2 - return 1 + exit $ksft_skip fi local name="$1" diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 6452d2129cd9..a4320c4b44dc 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -30,6 +30,7 @@ fi if [ ! -e "$DIR"/trigger_async_request ]; then echo "$0: empty filename: async trigger not present, ignoring test" >&2 + exit $ksft_skip else if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then echo "$0: empty filename should not succeed (async)" >&2 @@ -69,6 +70,7 @@ fi # Try the asynchronous version too if [ ! -e "$DIR"/trigger_async_request ]; then echo "$0: firmware loading: async trigger not present, ignoring test" >&2 + exit $ksft_skip else if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then echo "$0: could not trigger async request" >&2 @@ -89,7 +91,7 @@ test_config_present() { if [ ! -f $DIR/reset ]; then echo "Configuration triggers not present, ignoring test" - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 962d7f4ac627..6c5f1b2ffb74 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -9,11 +9,14 @@ DIR=/sys/devices/virtual/misc/test_firmware PROC_CONFIG="/proc/config.gz" TEST_DIR=$(dirname $0) +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + print_reqs_exit() { echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip } test_modprobe() @@ -88,7 +91,7 @@ verify_reqs() if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "usermode helper disabled so ignoring test" - exit 0 + exit $ksft_skip fi fi } diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 2a4f16fc9819..e4645d5e3126 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -15,14 +15,29 @@ reset_tracer() { # reset the current tracer echo nop > current_tracer } -reset_trigger() { # reset all current setting triggers - grep -v ^# events/*/*/trigger | +reset_trigger_file() { + # remove action triggers first + grep -H ':on[^:]*(' $@ | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["` + file=`echo $line | cut -f1 -d:` + echo "!$cmd" >> $file + done + grep -Hv ^# $@ | while read line; do - cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "` - echo "!$cmd" > `echo $line | cut -f1 -d:` + cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["` + file=`echo $line | cut -f1 -d:` + echo "!$cmd" > $file done } +reset_trigger() { # reset all current setting triggers + if [ -d events/synthetic ]; then + reset_trigger_file events/synthetic/*/trigger + fi + reset_trigger_file events/*/*/trigger +} + reset_events_filter() { # reset all current setting filters grep -v ^none events/*/*/filter | while read line; do diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index 5ba73035e1d9..a0002563e9ee 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -24,6 +24,14 @@ arm*) ARG2=%r1 OFFS=4 ;; +ppc64*) + ARG2=%r4 + OFFS=8 +;; +ppc*) + ARG2=%r4 + OFFS=4 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 231bcd2c4eb5..d026ff4e562f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -34,6 +34,13 @@ arm*) GOODREG=%r0 BADREG=%ax ;; +ppc*) + GOODREG=%r3 + BADREG=%msr +;; +*) + echo "Please implement other architecture here" + exit_untested esac test_goodarg() # Good-args diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc new file mode 100644 index 000000000000..2acbfe2c0c0c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: trace_marker trigger - test histogram trigger +# flags: instance + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -d events/ftrace/print ]; then + echo "event trace_marker is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/hist ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +do_reset + +echo "Test histogram trace_marker tigger" + +echo 'hist:keys=common_pid' > events/ftrace/print/trigger +for i in `seq 1 10` ; do echo "hello" > trace_marker; done +grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \ + fail "hist trigger did not trigger correct times on trace_marker" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc new file mode 100644 index 000000000000..6748e8cb42d0 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: trace_marker trigger - test snapshot trigger +# flags: instance + +do_reset() { + reset_trigger + echo > set_event + echo 0 > snapshot + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f snapshot ]; then + echo "snapshot is not supported" + exit_unsupported +fi + +if [ ! -d events/ftrace/print ]; then + echo "event trace_marker is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +test_trace() { + file=$1 + x=$2 + + cat $file | while read line; do + comment=`echo $line | sed -e 's/^#//'` + if [ "$line" != "$comment" ]; then + continue + fi + echo "testing $line for >$x<" + match=`echo $line | sed -e "s/>$x<//"` + if [ "$line" == "$match" ]; then + fail "$line does not have >$x< in it" + fi + let x=$x+2 + done +} + +do_reset + +echo "Test snapshot trace_marker tigger" + +echo 'snapshot' > events/ftrace/print/trigger + +# make sure the snapshot is allocated + +grep -q 'Snapshot is allocated' snapshot + +for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done + +test_trace trace 1 +test_trace snapshot 2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc new file mode 100644 index 000000000000..0a69c5d1cda8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc @@ -0,0 +1,68 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: trace_marker trigger - test histogram with synthetic event against kernel event +# flags: + +do_reset() { + reset_trigger + echo > set_event + echo > synthetic_events + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic events not supported" + exit_unsupported +fi + +if [ ! -d events/ftrace/print ]; then + echo "event trace_marker is not supported" + exit_unsupported +fi + +if [ ! -d events/sched/sched_waking ]; then + echo "event sched_waking is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/hist ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +do_reset + +echo "Test histogram kernel event to trace_marker latency histogram trigger" + +echo 'latency u64 lat' > synthetic_events +echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger +echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger +echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger +sleep 1 +echo "hello" > trace_marker + +grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \ + fail "hist trigger did not trigger correct times on trace_marker" + +grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \ + fail "hist trigger did not trigger " + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc new file mode 100644 index 000000000000..3666dd6ab02a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc @@ -0,0 +1,66 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: trace_marker trigger - test histogram with synthetic event +# flags: + +do_reset() { + reset_trigger + echo > set_event + echo > synthetic_events + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic events not supported" + exit_unsupported +fi + +if [ ! -d events/ftrace/print ]; then + echo "event trace_marker is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +if [ ! -f events/ftrace/print/hist ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +do_reset + +echo "Test histogram trace_marker to trace_marker latency histogram trigger" + +echo 'latency u64 lat' > synthetic_events +echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger +echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger +echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger +echo -n "start" > trace_marker +echo -n "end" > trace_marker + +cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l` + +if [ $cnt -ne 2 ]; then + fail "hist trace_marker trigger did not trigger correctly" +fi + +grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \ + fail "hist trigger did not trigger " + +do_reset + +exit 0 diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 8497a376ef9d..12631f0076a1 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -17,14 +17,6 @@ all: fi \ done -override define RUN_TESTS - @export KSFT_TAP_LEVEL=`echo 1`; - @echo "TAP version 13"; - @echo "selftests: futex"; - @echo "========================================"; - @cd $(OUTPUT); ./run.sh -endef - override define INSTALL_RULE mkdir -p $(INSTALL_PATH) install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @@ -36,10 +28,6 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS - echo "./run.sh" -endef - override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh index 183fb932edbd..7f35b9880485 100755 --- a/tools/testing/selftests/gpio/gpio-mockup.sh +++ b/tools/testing/selftests/gpio/gpio-mockup.sh @@ -2,10 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 #exit status -#1: run as non-root user +#1: Internal error #2: sysfs/debugfs not mount #3: insert module fail when gpio-mockup is a module. -#4: other reason. +#4: Skip test including run as non-root user. +#5: other reason. SYSFS= GPIO_SYSFS= @@ -15,6 +16,9 @@ GPIO_DEBUGFS= dev_type= module= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + usage() { echo "Usage:" @@ -34,7 +38,7 @@ prerequisite() msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 1 + exit $ksft_skip fi SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` if [ ! -d "$SYSFS" ]; then @@ -73,7 +77,7 @@ remove_module() die() { remove_module - exit 4 + exit 5 } test_chips() diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c index d21edea9c560..f6cd03a87493 100644 --- a/tools/testing/selftests/intel_pstate/aperf.c +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -9,6 +9,8 @@ #include <sys/timeb.h> #include <sched.h> #include <errno.h> +#include <string.h> +#include "../kselftest.h" void usage(char *name) { printf ("Usage: %s cpunum\n", name); @@ -41,8 +43,8 @@ int main(int argc, char **argv) { fd = open(msr_file_name, O_RDONLY); if (fd == -1) { - perror("Failed to open"); - return 1; + printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno)); + return KSFT_SKIP; } CPU_ZERO(&cpuset); diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index c670359becc6..e7008f614ad7 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -30,9 +30,18 @@ EVALUATE_ONLY=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then echo "$0 # Skipped: Test can only run on x86 architectures." - exit 0 + exit $ksft_skip +fi + +msg="skip all tests:" +if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip fi max_cpus=$(($(nproc)-1)) @@ -48,11 +57,12 @@ function run_test () { echo "sleeping for 5 seconds" sleep 5 - num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l) - if [ $num_freqs -le 2 ]; then - cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1 + grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs + num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ') + if [ $num_freqs -ge 2 ]; then + tail -n 1 /tmp/result.freqs > /tmp/result.$1 else - cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1 + cp /tmp/result.freqs /tmp/result.$1 fi ./msr 0 >> /tmp/result.$1 @@ -82,32 +92,37 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') max_freq=$(($_max_freq / 1000)) -for freq in `seq $max_freq -100 $min_freq` +[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq` do echo "Setting maximum frequency to $freq" cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null - [ $EVALUATE_ONLY -eq 0 ] && run_test $freq + run_test $freq done -echo "==============================================================================" +[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null +echo "========================================================================" echo "The marketing frequency of the cpu is $mkt_freq MHz" echo "The maximum frequency of the cpu is $max_freq MHz" echo "The minimum frequency of the cpu is $min_freq MHz" -cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null - # make a pretty table -echo "Target Actual Difference MSR(0x199) max_perf_pct" +echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab for freq in `seq $max_freq -100 $min_freq` do result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ') msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ') max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' ) - if [ $result_freq -eq $freq ]; then - echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))" - else - echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" - fi + cat >> /tmp/result.tab << EOF +$freq +$result_freq +$((result_freq - freq)) +$msr +$((max_perf_pct * max_freq)) +EOF done + +# print the table +pr -aTt -5 < /tmp/result.tab + exit 0 diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index ee9382bdfadc..dac927e82336 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -196,10 +196,9 @@ int main(int argc, char **argv) int msg, pid, err; struct msgque_data msgque; - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - return ksft_exit_fail(); - } + if (getuid() != 0) + return ksft_exit_skip( + "Please run the test as root - Exiting.\n"); msgque.key = ftok(argv[0], 822155650); if (msgque.key == -1) { diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 7956ea3be667..0a76314b4414 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -62,13 +62,16 @@ ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + test_modprobe() { if [ ! -d $DIR ]; then echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip fi } @@ -105,12 +108,12 @@ test_reqs() { if ! which modprobe 2> /dev/null > /dev/null; then echo "$0: You need modprobe installed" >&2 - exit 1 + exit $ksft_skip fi if ! which kmod 2> /dev/null > /dev/null; then echo "$0: You need kmod installed" >&2 - exit 1 + exit $ksft_skip fi # kmod 19 has a bad bug where it returns 0 when modprobe @@ -124,13 +127,13 @@ test_reqs() echo "$0: You need at least kmod 20" >&2 echo "kmod <= 19 is buggy, for details see:" >&2 echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2 - exit 1 + exit $ksft_skip fi uid=$(id -u) if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 1b9d8ecdebce..15e6b75fc3a5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -20,7 +20,7 @@ #define KSFT_XFAIL 2 #define KSFT_XPASS 3 /* Treat skip as pass */ -#define KSFT_SKIP KSFT_PASS +#define KSFT_SKIP 4 /* counters */ struct ksft_count { diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore new file mode 100644 index 000000000000..63fc1ab9248f --- /dev/null +++ b/tools/testing/selftests/kvm/.gitignore @@ -0,0 +1,3 @@ +set_sregs_test +sync_regs_test +vmx_tsc_adjust_test diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index c9f5b7d4ce38..cd01144d27c8 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -13,6 +13,8 @@ #include <execinfo.h> #include <sys/syscall.h> +#include "../../kselftest.h" + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) @@ -70,8 +72,9 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" - " pid=%d tid=%d\n", - file, line, exp_str, getpid(), gettid()); + " pid=%d tid=%d - %s\n", + file, line, exp_str, getpid(), gettid(), + strerror(errno)); test_dump_stack(); if (fmt) { fputs(" ", stderr); @@ -80,6 +83,8 @@ test_assert(bool exp, const char *exp_str, } va_end(ap); + if (errno == EACCES) + ksft_exit_skip("Access denied - Exiting.\n"); exit(254); } diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c index aaa633263b2c..d7cb7944a42e 100644 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -28,6 +28,8 @@ #include <string.h> #include <sys/ioctl.h> +#include "../kselftest.h" + #ifndef MSR_IA32_TSC_ADJUST #define MSR_IA32_TSC_ADJUST 0x3b #endif diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index c1b1a4dc6a96..17ab36605a8e 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -19,25 +19,43 @@ TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: +define RUN_TEST_PRINT_RESULT + TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \ + echo $$TEST_HDR_MSG; \ + echo "========================================"; \ + if [ ! -x $$TEST ]; then \ + echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + else \ + cd `dirname $$TEST` > /dev/null; \ + if [ "X$(summary)" != "X" ]; then \ + (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \ + echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \ + (if [ $$? -eq $$skip ]; then \ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \ + else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + fi;) \ + else \ + (./$$BASENAME_TEST && \ + echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \ + (if [ $$? -eq $$skip ]; then \ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \ + else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + fi;) \ + fi; \ + cd - > /dev/null; \ + fi; +endef + define RUN_TESTS @export KSFT_TAP_LEVEL=`echo 1`; \ test_num=`echo 0`; \ + skip=`echo 4`; \ echo "TAP version 13"; \ for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ test_num=`echo $$test_num+1 | bc`; \ - echo "selftests: $$BASENAME_TEST"; \ - echo "========================================"; \ - if [ ! -x $$TEST ]; then \ - echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ - echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ - else \ - if [ "X$(summary)" != "X" ]; then \ - cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ - else \ - cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ - fi; \ - fi; \ + $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \ done; endef @@ -76,9 +94,18 @@ else endif define EMIT_TESTS - @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ + @test_num=`echo 0`; \ + for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + test_num=`echo $$test_num+1 | bc`; \ + TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \ + echo "echo $$TEST_HDR_MSG"; \ + if [ ! -x $$TEST ]; then \ + echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \ + echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \ + else + echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \ + fi; \ done; endef @@ -106,6 +133,9 @@ COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) endif +# Selftest makefiles can override those targets by setting +# OVERRIDE_TARGETS = 1. +ifeq ($(OVERRIDE_TARGETS),) $(OUTPUT)/%:%.c $(LINK.c) $^ $(LDLIBS) -o $@ @@ -114,5 +144,6 @@ $(OUTPUT)/%.o:%.S $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ +endif .PHONY: run_tests all clean install emit_tests diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 08360060ab14..70d5711e3ac8 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh index 4dee4d2a8bbe..5a90006d1aea 100755 --- a/tools/testing/selftests/lib/bitmap.sh +++ b/tools/testing/selftests/lib/bitmap.sh @@ -1,9 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # Runs bitmap infrastructure tests using test_bitmap kernel module if ! /sbin/modprobe -q -n test_bitmap; then - echo "bitmap: [SKIP]" - exit 77 + echo "bitmap: module test_bitmap is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q test_bitmap; then diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh index b363994e5e11..78e7483c8d60 100755 --- a/tools/testing/selftests/lib/prime_numbers.sh +++ b/tools/testing/selftests/lib/prime_numbers.sh @@ -2,9 +2,12 @@ # SPDX-License-Identifier: GPL-2.0 # Checks fast/slow prime_number generation for inconsistencies -if ! /sbin/modprobe -q -r prime_numbers; then - echo "prime_numbers: [SKIP]" - exit 77 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n prime_numbers; then + echo "prime_numbers: module prime_numbers is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q prime_numbers selftest=65536; then diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh index 0c37377fd7d4..45a23e2d64ad 100755 --- a/tools/testing/selftests/lib/printf.sh +++ b/tools/testing/selftests/lib/printf.sh @@ -1,9 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # Runs printf infrastructure using test_printf kernel module + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if ! /sbin/modprobe -q -n test_printf; then - echo "printf: [SKIP]" - exit 77 + echo "printf: module test_printf is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q test_printf; then diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile new file mode 100644 index 000000000000..6e7761ab3536 --- /dev/null +++ b/tools/testing/selftests/locking/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for locking/ww_mutx selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := ww_mutex.sh + +include ../lib.mk diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh index 2c3d6b1878c2..91e4ac7566af 100644..100755 --- a/tools/testing/selftests/locking/ww_mutex.sh +++ b/tools/testing/selftests/locking/ww_mutex.sh @@ -1,6 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # Runs API tests for struct ww_mutex (Wait/Wound mutexes) +if ! /sbin/modprobe -q -n test-ww_mutex; then + echo "ww_mutex: module test-ww_mutex is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test-ww_mutex; then /sbin/modprobe -q -r test-ww_mutex diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index c82cec2497de..60826d7d37d4 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +# +CFLAGS += -I../ -I../../../../usr/include/ TEST_GEN_PROGS := media_device_test media_device_open video_device_test -all: $(TEST_GEN_PROGS) include ../lib.mk diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c index a5ce5434bafd..93183a37b133 100644 --- a/tools/testing/selftests/media_tests/media_device_open.c +++ b/tools/testing/selftests/media_tests/media_device_open.c @@ -34,6 +34,8 @@ #include <sys/stat.h> #include <linux/media.h> +#include "../kselftest.h" + int main(int argc, char **argv) { int opt; @@ -61,10 +63,8 @@ int main(int argc, char **argv) } } - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - exit(-1); - } + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); /* Open Media device and keep it open */ fd = open(media_device, O_RDWR); diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c index 421a367e4bb3..4b9953359e40 100644 --- a/tools/testing/selftests/media_tests/media_device_test.c +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -39,6 +39,8 @@ #include <time.h> #include <linux/media.h> +#include "../kselftest.h" + int main(int argc, char **argv) { int opt; @@ -66,10 +68,8 @@ int main(int argc, char **argv) } } - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - exit(-1); - } + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); /* Generate random number of interations */ srand((unsigned int) time(NULL)); @@ -88,7 +88,7 @@ int main(int argc, char **argv) "other Oops in the dmesg. Enable KaSan kernel\n" "config option for use-after-free error detection.\n\n"); - printf("Running test for %d iternations\n", count); + printf("Running test for %d iterations\n", count); while (count > 0) { ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi); diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 22bffd55a523..6793f8ecc8e7 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -293,10 +293,9 @@ static int test_membarrier_query(void) } ksft_exit_fail_msg("sys_membarrier() failed\n"); } - if (!(ret & MEMBARRIER_CMD_GLOBAL)) { - ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); - ksft_exit_fail_msg("sys_membarrier is not supported.\n"); - } + if (!(ret & MEMBARRIER_CMD_GLOBAL)) + ksft_exit_skip( + "sys_membarrier unsupported: CMD_GLOBAL not found.\n"); ksft_test_result_pass("sys_membarrier available\n"); return 0; diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 0862e6f47a38..53a848109f7b 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -4,9 +4,9 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_tests.sh -TEST_FILES := run_fuse_test.sh -TEST_GEN_FILES := memfd_test fuse_mnt fuse_test +TEST_GEN_PROGS := memfd_test +TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh +TEST_GEN_FILES := fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh index c2d41ed81b24..fb633eeb0290 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh @@ -1,11 +1,8 @@ #!/bin/bash # please run as root -# -# Normal tests requiring no special resources -# -./run_fuse_test.sh -./memfd_test +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 # # To test memfd_create with hugetlbfs, there needs to be hpages_test @@ -29,12 +26,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` hpages_needed=`expr $hpages_test - $freepgs` + if [ $UID != 0 ]; then + echo "Please run memfd with hugetlbfs test as root" + exit $ksft_skip + fi + echo 3 > /proc/sys/vm/drop_caches echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then - echo "Please run this test as root" - exit 1 - fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size @@ -53,7 +51,7 @@ if [ $freepgs -lt $hpages_test ]; then fi printf "Not enough huge pages available (%d < %d)\n" \ $freepgs $needpgs - exit 1 + exit $ksft_skip fi # diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 686da510f989..e0a625e34f40 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -4,11 +4,8 @@ all: include ../lib.mk TEST_PROGS := mem-on-off-test.sh -override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" - -override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))" run_full_test: - @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index ae2c790d0880..b37585e6aa38 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -3,30 +3,33 @@ SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + prerequisite() { msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` if [ ! -d "$SYSFS" ]; then echo $msg sysfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then echo $msg memory hotplug is not supported >&2 - exit 0 + exit $ksft_skip fi if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then echo $msg no hot-pluggable memory >&2 - exit 0 + exit $ksft_skip fi } @@ -133,7 +136,8 @@ offline_memory_expect_fail() error=-12 priority=0 -ratio=10 +# Run with default of ratio=2 for Kselftest run +ratio=2 retval=0 while getopts e:hp:r: opt; do diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index e094f71c6dbc..026890744215 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -3,15 +3,7 @@ CFLAGS = -Wall \ -O2 -TEST_GEN_PROGS := unprivileged-remount-test +TEST_PROGS := run_tests.sh +TEST_GEN_FILES := unprivileged-remount-test include ../lib.mk - -override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ - then \ - ./unprivileged-remount-test ; \ - else \ - echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ - fi -override EMIT_TESTS := echo "$(RUN_TESTS)" - diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh new file mode 100755 index 000000000000..4ab8f507dcba --- /dev/null +++ b/tools/testing/selftests/mount/run_tests.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Run mount selftests +if [ -f /proc/self/uid_map ] ; then + ./unprivileged-remount-test ; +else + echo "WARN: No /proc/self/uid_map exist, test skipped." ; + exit $ksft_skip +fi diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 743d3f9e5918..8a58055fc1f5 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,17 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O2 LDLIBS = -lrt -lpthread -lpopt + TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk - -override define RUN_TESTS - @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" -endef - -override define EMIT_TESTS - echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" - echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" -endef - diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c index e0a74bd207a5..9403ac01ba11 100644 --- a/tools/testing/selftests/mqueue/mq_open_tests.c +++ b/tools/testing/selftests/mqueue/mq_open_tests.c @@ -33,6 +33,8 @@ #include <mqueue.h> #include <error.h> +#include "../kselftest.h" + static char *usage = "Usage:\n" " %s path\n" @@ -53,6 +55,7 @@ int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize; int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; char *queue_path; +char *default_queue_path = "/test1"; mqd_t queue = -1; static inline void __set(FILE *stream, int value, char *err_msg); @@ -238,35 +241,33 @@ int main(int argc, char *argv[]) struct mq_attr attr, result; if (argc != 2) { - fprintf(stderr, "Must pass a valid queue name\n\n"); - fprintf(stderr, usage, argv[0]); - exit(1); - } + printf("Using Default queue path - %s\n", default_queue_path); + queue_path = default_queue_path; + } else { /* * Although we can create a msg queue with a non-absolute path name, * unlink will fail. So, if the name doesn't start with a /, add one * when we save it. */ - if (*argv[1] == '/') - queue_path = strdup(argv[1]); - else { - queue_path = malloc(strlen(argv[1]) + 2); - if (!queue_path) { - perror("malloc()"); - exit(1); + if (*argv[1] == '/') + queue_path = strdup(argv[1]); + else { + queue_path = malloc(strlen(argv[1]) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, argv[1]); } - queue_path[0] = '/'; - queue_path[1] = 0; - strcat(queue_path, argv[1]); } - if (getuid() != 0) { - fprintf(stderr, "Not running as root, but almost all tests " + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " "require root in order to modify\nsystem settings. " "Exiting.\n"); - exit(1); - } /* Find out what files there are for us to make tweaks in */ def_msgs = fopen(DEF_MSGS, "r+"); diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 8188f72de93c..b019e0b8221c 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -39,6 +39,8 @@ #include <popt.h> #include <error.h> +#include "../kselftest.h" + static char *usage = "Usage:\n" " %s [-c #[,#..] -f] path\n" @@ -626,12 +628,10 @@ int main(int argc, char *argv[]) cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) { - fprintf(stderr, "Not running as root, but almost all tests " + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " "require root in order to modify\nsystem settings. " "Exiting.\n"); - exit(1); - } max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c612d6e38c62..1a0ac3a29ec5 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,9 +1,15 @@ msg_zerocopy socket psock_fanout +psock_snd psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack reuseaddr_conflict +tcp_mmap +udpgso +udpgso_bench_rx +udpgso_bench_tx +tcp_inq diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3ff81a478dbe..663e11e85727 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,13 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh +TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd +TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread +$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 7ba089b33e8b..cd3a2f1545b5 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -12,3 +12,5 @@ CONFIG_NET_IPVTI=y CONFIG_INET6_XFRM_MODE_TUNNEL=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh new file mode 100755 index 000000000000..d4cfb6a7a086 --- /dev/null +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB rules API + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +IP="ip -netns testns" + +RTABLE=100 +GW_IP4=192.51.100.2 +SRC_IP=192.51.100.3 +GW_IP6=2001:db8:1::2 +SRC_IP6=2001:db8:1::3 + +DEV_ADDR=192.51.100.1 +DEV=dummy0 + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +setup() +{ + set -e + ip netns add testns + $IP link set dev lo up + + $IP link add dummy0 type dummy + $IP link set dev dummy0 up + $IP address add 198.51.100.1/24 dev dummy0 + $IP -6 address add 2001:db8:1::1/64 dev dummy0 + + set +e +} + +cleanup() +{ + $IP link del dev dummy0 &> /dev/null + ip netns del testns +} + +fib_check_iproute_support() +{ + ip rule help 2>&1 | grep -q $1 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing $1 match" + return 1 + fi + + ip route get help 2>&1 | grep -q $2 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 get route too old, missing $2 match" + return 1 + fi + + return 0 +} + +fib_rule6_del() +{ + $IP -6 rule del $1 + log_test $? 0 "rule6 del $1" +} + +fib_rule6_del_by_pref() +{ + pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP -6 rule del pref $pref +} + +fib_rule6_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP -6 rule add $match table $RTABLE + $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule6 check: $1" + + fib_rule6_del_by_pref "$match" + log_test $? 0 "rule6 del by pref: $match" +} + +fib_rule6_test() +{ + # setup the fib rule redirect route + $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink + + match="oif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP6 iif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +fib_rule4_del() +{ + $IP rule del $1 + log_test $? 0 "del $1" +} + +fib_rule4_del_by_pref() +{ + pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP rule del pref $pref +} + +fib_rule4_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP rule add $match table $RTABLE + $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule4 check: $1" + + fib_rule4_del_by_pref "$match" + log_test $? 0 "rule4 del by pref: $match" +} + +fib_rule4_test() +{ + # setup the fib rule redirect route + $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink + + match="oif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP iif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +run_fibrule_tests() +{ + log_section "IPv4 fib rule" + fib_rule4_test + log_section "IPv6 fib rule" + fib_rule6_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +# start clean +cleanup &> /dev/null +setup +run_fibrule_tests +cleanup + +exit $ret diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 9164e60d4b66..0f45633bd634 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -5,9 +5,14 @@ # different events. ret=0 - -VERBOSE=${VERBOSE:=0} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# all tests in this script. Can be overridden with -t option +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no IP="ip -netns testns" log_test() @@ -18,8 +23,10 @@ log_test() if [ ${rc} -eq ${expected} ]; then printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) else ret=1 + nfail=$((nfail+1)) printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo @@ -28,6 +35,13 @@ log_test() [ "$a" = "q" ] && exit 1 fi fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi } setup() @@ -563,39 +577,822 @@ fib_nexthop_test() } ################################################################################ -# +# Tests on route add and replace + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route6() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP -6 ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP -6 ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP -6 ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route6() +{ + add_route6 "2001:db8:104::/64" "$1" +} + +check_route6() +{ + local pfx="2001:db8:104::/64" + local expected="$1" + local out + local rc=0 + + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +route_cleanup() +{ + $IP li del red 2>/dev/null + $IP li del dummy1 2>/dev/null + $IP li del veth1 2>/dev/null + $IP li del veth3 2>/dev/null + + cleanup &> /dev/null +} + +route_setup() +{ + route_cleanup + setup + + [ "${VERBOSE}" = "1" ] && set -x + set -e + + $IP li add red up type vrf table 101 + $IP li add veth1 type veth peer name veth2 + $IP li add veth3 type veth peer name veth4 + + $IP li set veth1 up + $IP li set veth3 up + $IP li set veth2 vrf red up + $IP li set veth4 vrf red up + $IP li add dummy1 type dummy + $IP li set dummy1 vrf red up + + $IP -6 addr add 2001:db8:101::1/64 dev veth1 + $IP -6 addr add 2001:db8:101::2/64 dev veth2 + $IP -6 addr add 2001:db8:103::1/64 dev veth3 + $IP -6 addr add 2001:db8:103::2/64 dev veth4 + $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + + $IP addr add 172.16.101.1/24 dev veth1 + $IP addr add 172.16.101.2/24 dev veth2 + $IP addr add 172.16.103.1/24 dev veth3 + $IP addr add 172.16.103.2/24 dev veth4 + $IP addr add 172.16.104.1/24 dev dummy1 + + set +ex +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv6_rt_add() +{ + local rc + + echo + echo "IPv6 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Append nexthop to existing route - gw" + + # insert mpath directly + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Add multipath route" + + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} + +ipv6_rt_replace_single() +{ + # single path with single path + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route6 "nexthop via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with single path using MULTIPATH attribute + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" + else + check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + log_test $? 0 "Invalid nexthop" + fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv6_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path via multipath attribute" + + # route replace fails - invalid nexthop 1 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv6_rt_replace() +{ + echo + echo "IPv6 route replace tests" + + ipv6_rt_replace_single + ipv6_rt_replace_mpath +} + +ipv6_route_test() +{ + route_setup + + ipv6_rt_add + ipv6_rt_replace + + route_cleanup +} + +ip_addr_metric_check() +{ + ip addr help 2>&1 | grep -q metric + if [ $? -ne 0 ]; then + echo "iproute2 command does not support metric for addresses. Skipping test" + return 1 + fi + + return 0 +} + +ipv6_addr_metric_test() +{ + local rc + + echo + echo "IPv6 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP -6 addr flush dev dummy1" + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257" + set +e + + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP -6 addr flush dev dummy2" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258" + log_test $? 0 "User specified metric on second device" -fib_test() + run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route() { - if [ -n "$TEST" ]; then - eval $TEST + add_route "172.16.104.0/24" "$1" +} + +check_route() +{ + local pfx="172.16.104.0/24" + local expected="$1" + local out + local rc=0 + + out=$($IP ro ls match ${pfx}) + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv4_rt_add() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # iproute2 prepend only sets NLM_F_CREATE + # - adds a new route; does NOT convert existing route to ECMP + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Add new nexthop for existing prefix" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing route - gw" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing route - dev only" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append unreachable 172.16.104.0/24" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24" + log_test $? 0 "Append nexthop to existing route - reject route" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing reject route - gw" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing reject route - dev only" + + # insert mpath directly + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "add multipath route" + + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP ro del 172.16.104.0/24 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} + +ipv4_rt_replace_single() +{ + # single path with single path + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with reject + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Single path with reject route" + + # single path with single path using MULTIPATH attribute + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" else - fib_unreg_test - fib_down_test - fib_carrier_test - fib_nexthop_test + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Invalid nexthop" + fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv4_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path via multipath attribute" + + # multipath with reject + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Multipath with reject route" + + # route replace fails - invalid nexthop 1 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv4_rt_replace() +{ + echo + echo "IPv4 route replace tests" + + ipv4_rt_replace_single + ipv4_rt_replace_mpath +} + +ipv4_route_test() +{ + route_setup + + ipv4_rt_add + ipv4_rt_replace + + route_cleanup +} + +ipv4_addr_metric_test() +{ + local rc + + echo + echo "IPv4 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP addr add dev dummy1 172.16.104.1/24" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP addr flush dev dummy1" + run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257" + set +e + + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + log_test $? 0 "User specified metric on second device" + + run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v verbose mode (show commands and output) +EOF } +################################################################################ +# main + +while getopts :t:pPhv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +PEER_CMD="ip netns exec ${PEER_NS}" + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip; fi if [ ! -x "$(command -v ip)" ]; then echo "SKIP: Could not run test without ip tool" - exit 0 + exit $ksft_skip fi ip route help 2>&1 | grep -q fibmatch if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing fibmatch" - exit 0 + exit $ksft_skip fi # start clean cleanup &> /dev/null -fib_test +for t in $TESTS +do + case $t in + fib_unreg_test|unregister) fib_unreg_test;; + fib_down_test|down) fib_down_test;; + fib_carrier_test|carrier) fib_carrier_test;; + fib_nexthop_test|nexthop) fib_nexthop_test;; + ipv6_route_test|ipv6_rt) ipv6_route_test;; + ipv4_route_test|ipv4_rt) ipv4_route_test;; + ipv6_addr_metric) ipv6_addr_metric_test;; + ipv4_addr_metric) ipv4_addr_metric_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi exit $ret diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 75d922438bc9..d8313d0438b7 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -75,14 +76,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1cddf06f691d..c15c6c85c984 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 source lib.sh @@ -73,14 +74,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 1ac6c62271f3..7b18a53aa556 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -321,6 +321,50 @@ simple_if_fini() vrf_destroy $vrf_name } +tunnel_create() +{ + local name=$1; shift + local type=$1; shift + local local=$1; shift + local remote=$1; shift + + ip link add name $name type $type \ + local $local remote $remote "$@" + ip link set dev $name up +} + +tunnel_destroy() +{ + local name=$1; shift + + ip link del dev $name +} + +vlan_create() +{ + local if_name=$1; shift + local vid=$1; shift + local vrf=$1; shift + local ips=("${@}") + local name=$if_name.$vid + + ip link add name $name link $if_name type vlan id $vid + if [ "$vrf" != "" ]; then + ip link set dev $name master $vrf + fi + ip link set dev $name up + __addr_add_del $name add "${ips[@]}" +} + +vlan_destroy() +{ + local if_name=$1; shift + local vid=$1; shift + local name=$if_name.$vid + + ip link del dev $name +} + master_name_get() { local if_name=$1 @@ -335,6 +379,15 @@ link_stats_tx_packets_get() ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' } +tc_rule_stats_get() +{ + local dev=$1; shift + local pref=$1; shift + + tc -j -s filter show dev $dev ingress pref $pref | + jq '.[1].options.actions[].stats.packets' +} + mac_get() { local if_name=$1 @@ -353,19 +406,33 @@ bridge_ageing_time_get() echo $((ageing_time / 100)) } -forwarding_enable() +declare -A SYSCTL_ORIG +sysctl_set() +{ + local key=$1; shift + local value=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl -qw $key=$value +} + +sysctl_restore() { - ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) - ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + local key=$1; shift - sysctl -q -w net.ipv4.conf.all.forwarding=1 - sysctl -q -w net.ipv6.conf.all.forwarding=1 + sysctl -qw $key=${SYSCTL_ORIG["$key"]} +} + +forwarding_enable() +{ + sysctl_set net.ipv4.conf.all.forwarding 1 + sysctl_set net.ipv6.conf.all.forwarding 1 } forwarding_restore() { - sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd - sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd + sysctl_restore net.ipv6.conf.all.forwarding + sysctl_restore net.ipv4.conf.all.forwarding } tc_offload_check() @@ -381,6 +448,115 @@ tc_offload_check() return 0 } +trap_install() +{ + local dev=$1; shift + local direction=$1; shift + + # For slow-path testing, we need to install a trap to get to + # slow path the packets that would otherwise be switched in HW. + tc filter add dev $dev $direction pref 1 flower skip_sw action trap +} + +trap_uninstall() +{ + local dev=$1; shift + local direction=$1; shift + + tc filter del dev $dev $direction pref 1 flower skip_sw +} + +slow_path_trap_install() +{ + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + trap_install "$@" + fi +} + +slow_path_trap_uninstall() +{ + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + trap_uninstall "$@" + fi +} + +__icmp_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local vsuf=$1; shift + local tundev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$tundev" ingress \ + proto ip$vsuf pref $pref \ + flower ip_proto icmp$vsuf $filter \ + action pass +} + +icmp_capture_install() +{ + __icmp_capture_add_del add 100 "" "$@" +} + +icmp_capture_uninstall() +{ + __icmp_capture_add_del del 100 "" "$@" +} + +icmp6_capture_install() +{ + __icmp_capture_add_del add 100 v6 "$@" +} + +icmp6_capture_uninstall() +{ + __icmp_capture_add_del del 100 v6 "$@" +} + +__vlan_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local dev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$dev" ingress \ + proto 802.1q pref $pref \ + flower $filter \ + action pass +} + +vlan_capture_install() +{ + __vlan_capture_add_del add 100 "$@" +} + +vlan_capture_uninstall() +{ + __vlan_capture_add_del del 100 "$@" +} + +matchall_sink_create() +{ + local dev=$1; shift + + tc qdisc add dev $dev clsact + tc filter add dev $dev ingress \ + pref 10000 \ + matchall \ + action drop +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + ############################################################################## # Tests diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh new file mode 100755 index 000000000000..e6fd7a18c655 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the device to mirror to is a +# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated, +# and another gretap / ip6gretap netdevice is then capable of decapsulating the +# traffic. Test that the payload is what is expected (ICMP ping request or +# reply, depending on test). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_mac + test_ip6gretap_mac + test_two_spans +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_mac() +{ + local tundev=$1; shift + local direction=$1; shift + local prot=$1; shift + local what=$1; shift + + local swp3mac=$(mac_get $swp3) + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ + action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + tc filter del dev $h3 ingress pref 77 + mirror_uninstall $swp1 $direction + + log_test "$direction $what: envelope MAC ($tcflags)" +} + +test_two_spans() +{ + RET=0 + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 egress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + fail_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_uninstall $swp1 egress + quick_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + log_test "two simultaneously configured mirrors ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" +} + +test_gretap_mac() +{ + test_span_gre_mac gt4 ingress ip "mirror to gretap" + test_span_gre_mac gt4 egress ip "mirror to gretap" +} + +test_ip6gretap_mac() +{ + test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh new file mode 100755 index 000000000000..360ca133bead --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -0,0 +1,226 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | +---------------------------------------------------------------------+ | +# | | OL + gt6 (ip6gretap) + gt4 (gretap) | | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | | +# | | : ttl=100 : ttl=100 | | +# | | : tos=inherit : tos=inherit | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | : | : | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | | UL : |,---------------------' | | +# | | + $swp3 : || : | | +# | | | 192.0.2.129/28 : vv : | | +# | | | 2001:db8:2::1/64 : + ul (dummy) : | | +# | +---|---------------------:----------------------:--------------------+ | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 | +# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ +# +# This tests mirroring to gretap and ip6gretap configured in an overlay / +# underlay manner, i.e. with a bound dummy device that marks underlay VRF where +# the encapsulated packed should be routed. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64 + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64 +} + +switch_create() +{ + # Bridge between H1 and H2. + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + + # Underlay. + + simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link add name ul type dummy + ip link set dev ul master v$swp3 + ip link set dev ul up + + # Overlay. + + vrf_create vrf-ol + ip link set dev vrf-ol up + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit dev ul + ip link set dev gt4 master vrf-ol + ip link set dev gt4 up + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit dev ul allow-localremote + ip link set dev gt6 master vrf-ol + ip link set dev gt6 up +} + +switch_destroy() +{ + vrf_destroy vrf-ol + + tunnel_destroy gt6 + tunnel_destroy gt4 + + simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link del dev ul + + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh new file mode 100755 index 000000000000..3bb4c2ba7b14 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# bridge device without vlan filtering (802.1d). The device attached to that +# bridge is a VLAN. + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + vlan_create $swp3 555 + + ip link set dev $swp3.555 master br2 + ip route add 192.0.2.130/32 dev br2 + ip -6 route add 2001:db8:2::2/128 dev br2 + + ip address add dev br2 192.0.2.129/32 + ip address add dev br2 2001:db8:2::1/128 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h3 555 + ip link del dev br2 + vlan_destroy $swp3 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh new file mode 100755 index 000000000000..aa29d46186a8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test how mirrors to gretap and ip6gretap react to changes to relevant +# configuration. + +ALL_TESTS=" + test_ttl + test_tun_up + test_egress_up + test_remote_ip + test_tun_del + test_route_del +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + # This test downs $swp3, which deletes the configured IPv6 address + # unless this sysctl is set. + sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1 + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_ttl() +{ + local tundev=$1; shift + local type=$1; shift + local prot=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_ttl 50 action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 + + ip link set dev $tundev type $type ttl 50 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + ip link set dev $tundev type $type ttl 100 + tc filter del dev $h3 ingress pref 77 + mirror_uninstall $swp1 ingress + + log_test "$what: TTL change ($tcflags)" +} + +test_span_gre_tun_up() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev up + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel down/up ($tcflags)" +} + +test_span_gre_egress_up() +{ + local tundev=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $swp3 down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + # After setting the device up, wait for neighbor to get resolved so that + # we can expect mirroring to work. + ip link set dev $swp3 up + while true; do + ip neigh sh dev $swp3 $remote_ip nud reachable | + grep -q ^ + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: egress down/up ($tcflags)" +} + +test_span_gre_remote_ip() +{ + local tundev=$1; shift + local type=$1; shift + local correct_ip=$1; shift + local wrong_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev type $type remote $wrong_ip + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev type $type remote $correct_ip + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: remote address change ($tcflags)" +} + +test_span_gre_tun_del() +{ + local tundev=$1; shift + local type=$1; shift + local flags=$1; shift + local local_ip=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + ip link del dev $tundev + fail_test_span_gre_dir $tundev ingress + + tunnel_create $tundev $type $local_ip $remote_ip \ + ttl 100 tos inherit $flags + + # Recreating the tunnel doesn't reestablish mirroring, so reinstall it + # and verify it works for the follow-up tests. + mirror_uninstall $swp1 ingress + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel deleted ($tcflags)" +} + +test_span_gre_route_del() +{ + local tundev=$1; shift + local edev=$1; shift + local route=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + ip route del $route dev $edev + fail_test_span_gre_dir $tundev ingress + + ip route add $route dev $edev + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: underlay route removal ($tcflags)" +} + +test_ttl() +{ + test_span_gre_ttl gt4 gretap ip "mirror to gretap" + test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap" +} + +test_tun_up() +{ + test_span_gre_tun_up gt4 "mirror to gretap" + test_span_gre_tun_up gt6 "mirror to ip6gretap" +} + +test_egress_up() +{ + test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap" + test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap" +} + +test_remote_ip() +{ + test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap" + test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap" +} + +test_tun_del() +{ + test_span_gre_tun_del gt4 gretap "" \ + 192.0.2.129 192.0.2.130 "mirror to gretap" + test_span_gre_tun_del gt6 ip6gretap allow-localremote \ + 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap" +} + +test_route_del() +{ + test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap" + test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh new file mode 100755 index 000000000000..12914f40612d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The +# interfaces on H1 and H2 have two addresses each. Flower match on one of the +# addresses is configured with mirror action. It is expected that when pinging +# this address, mirroring takes place, whereas when pinging the other one, +# there's no mirroring. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 + + ip address add dev $h1 192.0.2.3/28 + ip address add dev $h2 192.0.2.4/28 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h2 192.0.2.4/28 + ip address del dev $h1 192.0.2.3/28 + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_dir_acl() +{ + test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + +fail_test_span_gre_dir_acl() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + +full_test_span_gre_dir_acl() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local match_dip=$1; shift + local what=$1; shift + + mirror_install $swp1 $direction $tundev \ + "protocol ip flower $tcflags dst_ip $match_dip" + fail_test_span_gre_dir $tundev $direction + test_span_gre_dir_acl "$tundev" "$direction" \ + "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + # Test lack of mirroring after ACL mirror is uninstalled. + fail_test_span_gre_dir_acl "$tundev" "$direction" + + log_test "$direction $what ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap" + full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap" + full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh new file mode 100644 index 000000000000..619b469365be --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: GPL-2.0 + +source mirror_lib.sh + +quick_test_span_gre_dir_ips() +{ + local tundev=$1; shift + + do_test_span_dir_ips 10 h3-$tundev "$@" +} + +fail_test_span_gre_dir_ips() +{ + local tundev=$1; shift + + do_test_span_dir_ips 0 h3-$tundev "$@" +} + +test_span_gre_dir_ips() +{ + local tundev=$1; shift + + test_span_dir_ips h3-$tundev "$@" +} + +full_test_span_gre_dir_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +full_test_span_gre_dir_vlan_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + + tc filter add dev $h3 ingress pref 77 prot 802.1q \ + flower $vlan_match ip_proto 0x2f \ + action pass + mirror_test v$h1 $ip1 $ip2 $h3 77 10 + tc filter del dev $h3 ingress pref 77 + + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +quick_test_span_gre_dir() +{ + quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_gre_dir() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_gre_dir() +{ + test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_dir() +{ + full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_dir_vlan() +{ + full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_stp_ips() +{ + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state disabled + sleep 1 + fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state forwarding + sleep 1 + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + mirror_uninstall $swp1 ingress + + log_test "$what: STP state ($tcflags)" +} + +full_test_span_gre_stp() +{ + full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh new file mode 100755 index 000000000000..fc0508e40fca --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for +# the tunnel remote address has invalid address at the time that the mirroring +# is set up. Later on, the neighbor is deleted and it is expected to be +# reinitialized using the usual ARP process, and the mirroring offload updated. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_neigh() +{ + local addr=$1; shift + local tundev=$1; shift + local direction=$1; shift + local what=$1; shift + + RET=0 + + ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 + mirror_install $swp1 $direction $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + ip neigh del dev $swp3 $addr + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 $direction + + log_test "$direction $what: neighbor change ($tcflags)" +} + +test_gretap() +{ + test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" +} + +test_ip6gretap() +{ + test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh new file mode 100755 index 000000000000..8fa681eb90e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint +# is reachable through a next-hop route (as opposed to directly-attached route). + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.$h3.rp_filter 0 + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.161/28 + ip address add dev $h3 192.0.2.162/28 + ip address add dev gt4 192.0.2.129/32 + ip address add dev h3-gt4 192.0.2.130/32 + + # IPv6 route can't be added after address. Such routes are rejected due + # to the gateway address having been configured on the local system. It + # works the other way around though. + ip address add dev $swp3 2001:db8:4::1/64 + ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2 + ip address add dev $h3 2001:db8:4::2/64 + ip address add dev gt6 2001:db8:2::1 + ip address add dev h3-gt6 2001:db8:2::2 +} + +cleanup() +{ + pre_cleanup + + ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2 + ip address del dev $h3 2001:db8:4::2/64 + ip address del dev $swp3 2001:db8:4::1/64 + + ip address del dev $h3 192.0.2.162/28 + ip address del dev $swp3 192.0.2.161/28 + + mirror_gre_topo_destroy + vrf_cleanup + + sysctl_restore net.ipv4.conf.$h3.rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + +test_gretap() +{ + RET=0 + mirror_install $swp1 ingress gt4 "matchall $tcflags" + + # For IPv4, test that there's no mirroring without the route directing + # the traffic to tunnel remote address. Then add it and test that + # mirroring starts. For IPv6 we can't test this due to the limitation + # that routes for locally-specified IPv6 addresses can't be added. + fail_test_span_gre_dir gt4 ingress + + ip route add 192.0.2.130/32 via 192.0.2.162 + quick_test_span_gre_dir gt4 ingress + ip route del 192.0.2.130/32 via 192.0.2.162 + + mirror_uninstall $swp1 ingress + log_test "mirror to gre with next-hop remote ($tcflags)" +} + +test_ip6gretap() +{ + RET=0 + + mirror_install $swp1 ingress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt6 ingress + mirror_uninstall $swp1 ingress + + log_test "mirror to ip6gre with next-hop remote ($tcflags)" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh new file mode 100644 index 000000000000..253419564708 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring to gretap and ip6gretap +# netdevices. The tests that use it tweak it in one way or another--importantly, +# $swp3 and $h3 need to have addresses set up. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | +# | | : ttl=100 : ttl=100 | +# | | : tos=inherit : tos=inherit | +# | | : : | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | loc=2001:db8:2::2 loc=192.0.2.130 | +# | rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ + +source mirror_topo_lib.sh + +mirror_gre_topo_h3_create() +{ + mirror_topo_h3_create + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +mirror_gre_topo_h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + mirror_topo_h3_destroy +} + +mirror_gre_topo_switch_create() +{ + mirror_topo_switch_create + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit allow-localremote +} + +mirror_gre_topo_switch_destroy() +{ + tunnel_destroy gt6 + tunnel_destroy gt4 + + mirror_topo_switch_destroy +} + +mirror_gre_topo_create() +{ + mirror_topo_h1_create + mirror_topo_h2_create + mirror_gre_topo_h3_create + + mirror_gre_topo_switch_create +} + +mirror_gre_topo_destroy() +{ + mirror_gre_topo_switch_destroy + + mirror_gre_topo_h3_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh new file mode 100755 index 000000000000..88cecdb9a861 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice +# whose underlay route points at a vlan device. + +ALL_TESTS=" + test_gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name $swp3.555 link $swp3 type vlan id 555 + ip address add dev $swp3.555 192.0.2.129/32 + ip address add dev $swp3.555 2001:db8:2::1/128 + ip link set dev $swp3.555 up + + ip route add 192.0.2.130/32 dev $swp3.555 + ip -6 route add 2001:db8:2::2/128 dev $swp3.555 + + ip link add name $h3.555 link $h3 type vlan id 555 + ip link set dev $h3.555 master v$h3 + ip address add dev $h3.555 192.0.2.130/28 + ip address add dev $h3.555 2001:db8:2::2/64 + ip link set dev $h3.555 up +} + +cleanup() +{ + pre_cleanup + + ip link del dev $h3.555 + ip link del dev $swp3.555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh new file mode 100755 index 000000000000..5dbc7a08f4bd --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -0,0 +1,270 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# vlan device on top of a bridge device with vlan filtering (802.1q). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_forbidden_cpu + test_ip6gretap_forbidden_cpu + test_gretap_forbidden_egress + test_ip6gretap_forbidden_egress + test_gretap_untagged_egress + test_ip6gretap_untagged_egress + test_gretap_fdb_roaming + test_ip6gretap_fdb_roaming + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128 + bridge vlan add dev br1 vid 555 self + ip route rep 192.0.2.130/32 dev br1.555 + ip -6 route rep 2001:db8:2::2/128 dev br1.555 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 + + ip link set dev $swp3 master br1 + bridge vlan add dev $swp3 vid 555 + bridge vlan add dev $swp2 vid 555 +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp3 nomaster + vlan_destroy $h3 555 + vlan_destroy br1 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_span_gre_forbidden_cpu() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + # Run the pass-test first, to prime neighbor table. + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + # Now forbid the VLAN at the bridge and see it fail. + bridge vlan del dev br1 vid 555 self + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev br1 vid 555 self + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge ($tcflags)" +} + +test_gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap" +} + +test_span_gre_forbidden_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge vlan del dev $swp3 vid 555 + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev $swp3 vid 555 + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge egress ($tcflags)" +} + +test_gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_untagged_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 pvid untagged + sleep 1 + quick_test_span_gre_dir $tundev ingress + fail_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 + sleep 1 + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan untagged at a bridge egress ($tcflags)" +} + +test_gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt4 "mirror to gretap" +} + +test_ip6gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_fdb_roaming() +{ + local tundev=$1; shift + local what=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp3 $h3mac vlan 555 master + bridge fdb add dev $swp2 $h3mac vlan 555 master + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp2 $h3mac vlan 555 master + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: MAC roaming ($tcflags)" +} + +test_gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt4 "mirror to gretap" +} + +test_ip6gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt6 "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh new file mode 100644 index 000000000000..d36dc26c6c51 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: GPL-2.0 + +mirror_install() +{ + local from_dev=$1; shift + local direction=$1; shift + local to_dev=$1; shift + local filter=$1; shift + + tc filter add dev $from_dev $direction \ + pref 1000 $filter \ + action mirred egress mirror dev $to_dev +} + +mirror_uninstall() +{ + local from_dev=$1; shift + local direction=$1; shift + + tc filter del dev $swp1 $direction pref 1000 +} + +mirror_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local dev=$1; shift + local pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $dev $pref) + ip vrf exec $vrf_name \ + ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null + local t1=$(tc_rule_stats_get $dev $pref) + local delta=$((t1 - t0)) + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} + +do_test_span_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + icmp_capture_install $dev + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + icmp_capture_uninstall $dev +} + +quick_test_span_dir_ips() +{ + do_test_span_dir_ips 10 "$@" +} + +fail_test_span_dir_ips() +{ + do_test_span_dir_ips 0 "$@" +} + +test_span_dir_ips() +{ + local dev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + + icmp_capture_install $dev "type $forward_type" + mirror_test v$h1 $ip1 $ip2 $dev 100 10 + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" + mirror_test v$h2 $ip2 $ip1 $dev 100 10 + icmp_capture_uninstall $dev +} + +fail_test_span_dir() +{ + fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_dir() +{ + test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +do_test_span_vlan_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local vid=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + # Install the capture as skip_hw to avoid double-counting of packets. + # The traffic is meant for local box anyway, so will be trapped to + # kernel. + vlan_capture_install $dev "skip_hw vlan_id $vid" + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + vlan_capture_uninstall $dev +} + +quick_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 10 "$@" +} + +fail_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 0 "$@" +} + +quick_test_span_vlan_dir() +{ + quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_vlan_dir() +{ + fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh new file mode 100644 index 000000000000..04979e5962e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring. The tests that use it +# tweak it in one way or another--typically add more devices to the topology. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 | +# +-----|-------------------------------------------------------------------+ +# | +# +-----|-------------------------------------------------------------------+ +# | H3 + $h3 | +# | | +# +-------------------------------------------------------------------------+ + +mirror_topo_h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +mirror_topo_h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +mirror_topo_h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +mirror_topo_h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +mirror_topo_h3_create() +{ + simple_if_init $h3 + tc qdisc add dev $h3 clsact +} + +mirror_topo_h3_destroy() +{ + tc qdisc del dev $h3 clsact + simple_if_fini $h3 +} + +mirror_topo_switch_create() +{ + ip link set dev $swp3 up + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact +} + +mirror_topo_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 + + ip link set dev $swp3 down +} + +mirror_topo_create() +{ + mirror_topo_h1_create + mirror_topo_h2_create + mirror_topo_h3_create + + mirror_topo_switch_create +} + +mirror_topo_destroy() +{ + mirror_topo_switch_destroy + + mirror_topo_h3_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh new file mode 100755 index 000000000000..9ab2ce77b332 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh +# for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a vlan device. + +ALL_TESTS=" + test_vlan + test_tagged_vlan +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_topo_create + + vlan_create $swp3 555 + + vlan_create $h3 555 v$h3 + matchall_sink_create $h3.555 + + vlan_create $h1 111 v$h1 192.0.2.17/28 + bridge vlan add dev $swp1 vid 111 + + vlan_create $h2 111 v$h2 192.0.2.18/28 + bridge vlan add dev $swp2 vid 111 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h2 111 + vlan_destroy $h1 111 + vlan_destroy $h3 555 + vlan_destroy $swp3 555 + + mirror_topo_destroy + vrf_cleanup +} + +test_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + log_test "$direction mirror to vlan ($tcflags)" +} + +test_vlan() +{ + test_vlan_dir ingress 8 0 + test_vlan_dir egress 0 8 +} + +test_tagged_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ + 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ + 192.0.2.17 192.0.2.18 + mirror_uninstall $swp1 $direction + + log_test "$direction mirror tagged to vlan ($tcflags)" +} + +test_tagged_vlan() +{ + test_tagged_vlan_dir ingress 8 0 + test_tagged_vlan_dir egress 0 8 +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + trap_install $h3 ingress + + tests_run + + trap_uninstall $h3 ingress + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index cc6a14abfa87..a75cb51cc5bd 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6" NUM_NETIFS=4 source lib.sh @@ -114,12 +115,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 3bc351008db6..8b6d0fb6d604 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test" NUM_NETIFS=8 source lib.sh @@ -191,7 +192,7 @@ multipath_eval() diff=$(echo $weights_ratio - $packets_ratio | bc -l) diff=${diff#-} - test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0 check_err $? "Too large discrepancy between expected and measured ratios" log_test "$desc" log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" @@ -204,13 +205,11 @@ multipath4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) - sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + sysctl_set net.ipv4.fib_multipath_hash_policy 1 ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 @@ -232,7 +231,7 @@ multipath4_test() ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 \ nexthop via 169.254.3.23 dev $rp13 - sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv4.fib_multipath_hash_policy } multipath6_l4_test() @@ -242,13 +241,11 @@ multipath6_l4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) - sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip route replace 2001:db8:2::/64 vrf vrf-r1 \ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ @@ -271,7 +268,7 @@ multipath6_l4_test() nexthop via fe80:2::22 dev $rp12 \ nexthop via fe80:3::23 dev $rp13 - sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv6.fib_multipath_hash_policy } multipath6_test() @@ -364,13 +361,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 -multipath_test +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 3a6385ebd5d0..813d02d1939d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ + mirred_egress_mirror_test gact_trap_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -111,6 +113,10 @@ gact_trap_test() { RET=0 + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ skip_hw dst_ip 192.0.2.2 action drop tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ @@ -179,24 +185,29 @@ cleanup() ip link set $swp1 address $swp1origmac } +mirred_egress_redirect_test() +{ + mirred_egress_test "redirect" +} + +mirred_egress_mirror_test() +{ + mirred_egress_test "mirror" +} + trap cleanup EXIT setup_prepare setup_wait -gact_drop_and_ok_test -mirred_egress_test "redirect" -mirred_egress_test "mirror" +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - gact_drop_and_ok_test - mirred_egress_test "redirect" - mirred_egress_test "mirror" - gact_trap_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2fd15226974b..d2c783e94df3 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="unreachable_chain_test gact_goto_chain_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -107,16 +108,14 @@ trap cleanup EXIT setup_prepare setup_wait -unreachable_chain_test -gact_goto_chain_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - unreachable_chain_test - gact_goto_chain_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 032b882adfc0..20d1077e5a3d 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ + match_src_ip_test match_ip_flags_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -149,6 +151,74 @@ match_src_ip_test() log_test "src_ip match ($tcflags)" } +match_ip_flags_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags ip_flags frag action continue + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags ip_flags firstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags ip_flags nofirstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \ + $tcflags ip_flags nofrag action drop + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on wrong frag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on wrong firstfrag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on nofirstfrag filter (nofrag) " + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Did not match on nofrag filter (nofrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0,mf" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on frag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match fistfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Matched on wrong nofirstfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Match on wrong nofrag filter (1stfrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256,mf" -q + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_err $? "Did not match on frag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Matched on wrong firstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 3 + check_err $? "Did not match on nofirstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Matched on nofrag filter (no1stfrag)" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower + + log_test "ip_flags match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} @@ -177,20 +247,14 @@ trap cleanup EXIT setup_prepare setup_wait -match_dst_mac_test -match_src_mac_test -match_dst_ip_test -match_src_ip_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - match_dst_mac_test - match_src_mac_test - match_dst_ip_test - match_src_ip_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh index 077b98048ef4..b5b917203815 100755 --- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="shared_block_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -109,14 +110,14 @@ trap cleanup EXIT setup_prepare setup_wait -shared_block_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - shared_block_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index d571d213418d..c43c6debda06 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -21,6 +21,14 @@ readonly DADDR6='fd::2' readonly path_sysctl_mem="net.core.optmem_max" +# No arguments: automated test +if [[ "$#" -eq "0" ]]; then + $0 4 tcp -t 1 + $0 6 tcp -t 1 + echo "OK. All tests passed" + exit 0 +fi + # Argument parsing if [[ "$#" -lt "2" ]]; then echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index 903679e0ff31..e3afcb424710 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -8,6 +8,9 @@ # if not they probably have failed earlier in the boot process and their logged error will be catched by another test # +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # this function will try to up the interface # if already up, nothing done # arg1: network interface name @@ -18,7 +21,7 @@ kci_net_start() ip link show "$netdev" |grep -q UP if [ $? -eq 0 ];then echo "SKIP: $netdev: interface already up" - return 0 + return $ksft_skip fi ip link set "$netdev" up @@ -61,12 +64,12 @@ kci_net_setup() ip address show "$netdev" |grep '^[[:space:]]*inet' if [ $? -eq 0 ];then echo "SKIP: $netdev: already have an IP" - return 0 + return $ksft_skip fi # TODO what ipaddr to set ? DHCP ? echo "SKIP: $netdev: set IP address" - return 0 + return $ksft_skip } # test an ethtool command @@ -84,6 +87,7 @@ kci_netdev_ethtool_test() if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then echo "SKIP: $netdev: ethtool $2 not supported" + return $ksft_skip else echo "FAIL: $netdev: ethtool $2" return 1 @@ -104,7 +108,7 @@ kci_netdev_ethtool() ethtool --version 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: ethtool not present" - return 1 + return $ksft_skip fi TMP_ETHTOOL_FEATURES="$(mktemp)" @@ -176,13 +180,13 @@ kci_test_netdev() #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi ip link show 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: Could not run test without the ip tool" - exit 0 + exit $ksft_skip fi TMP_LIST_NETDEV="$(mktemp)" diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 1e428781a625..f8cc38afffa2 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -43,6 +43,9 @@ # that MTU is properly calculated instead when MTU is not configured from # userspace +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + tests=" pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions @@ -162,7 +165,7 @@ setup_xfrm6() { } setup() { - [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1 + [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip cleanup_done=0 for arg do @@ -368,7 +371,7 @@ test_pmtu_vti6_link_add_mtu() { fail=0 - min=1280 + min=68 # vti6 can carry IPv4 packets too max=$((65535 - 40)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do @@ -384,7 +387,7 @@ test_pmtu_vti6_link_add_mtu() { done # Now check valid values - for v in 1280 1300 $((65535 - 40)); do + for v in 68 1280 1300 $((65535 - 40)); do ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 mtu="$(link_get_mtu "${ns_a}" vti6_a)" ${ns_a} ip link del vti6_a diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c new file mode 100644 index 000000000000..7d15e10a9fb6 --- /dev/null +++ b/tools/testing/selftests/net/psock_snd.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <net/if.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "psock_lib.h" + +static bool cfg_use_bind; +static bool cfg_use_csum_off; +static bool cfg_use_csum_off_bad; +static bool cfg_use_dgram; +static bool cfg_use_gso; +static bool cfg_use_qdisc_bypass; +static bool cfg_use_vlan; +static bool cfg_use_vnet; + +static char *cfg_ifname = "lo"; +static int cfg_mtu = 1500; +static int cfg_payload_len = DATA_LEN; +static int cfg_truncate_len = INT_MAX; +static uint16_t cfg_port = 8000; + +/* test sending up to max mtu + 1 */ +#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1) + +static char tbuf[TEST_SZ], rbuf[TEST_SZ]; + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static int build_vnet_header(void *header) +{ + struct virtio_net_hdr *vh = header; + + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr); + + if (cfg_use_csum_off) { + vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; + vh->csum_start = ETH_HLEN + sizeof(struct iphdr); + vh->csum_offset = __builtin_offsetof(struct udphdr, check); + + /* position check field exactly one byte beyond end of packet */ + if (cfg_use_csum_off_bad) + vh->csum_start += sizeof(struct udphdr) + cfg_payload_len - + vh->csum_offset - 1; + } + + if (cfg_use_gso) { + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP; + vh->gso_size = cfg_mtu - sizeof(struct iphdr); + } + + return sizeof(*vh); +} + +static int build_eth_header(void *header) +{ + struct ethhdr *eth = header; + + if (cfg_use_vlan) { + uint16_t *tag = header + ETH_HLEN; + + eth->h_proto = htons(ETH_P_8021Q); + tag[1] = htons(ETH_P_IP); + return ETH_HLEN + 4; + } + + eth->h_proto = htons(ETH_P_IP); + return ETH_HLEN; +} + +static int build_ipv4_header(void *header, int payload_len) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len); + iph->id = htons(1337); + iph->protocol = IPPROTO_UDP; + iph->saddr = htonl((172 << 24) | (17 << 16) | 2); + iph->daddr = htonl((172 << 24) | (17 << 16) | 1); + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); + + return iph->ihl << 2; +} + +static int build_udp_header(void *header, int payload_len) +{ + const int alen = sizeof(uint32_t); + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(9); + udph->dest = htons(cfg_port); + udph->len = htons(len); + + if (cfg_use_csum_off) + udph->check = build_ip_csum(header - (2 * alen), alen, + htons(IPPROTO_UDP) + udph->len); + else + udph->check = 0; + + return sizeof(*udph); +} + +static int build_packet(int payload_len) +{ + int off = 0; + + off += build_vnet_header(tbuf); + off += build_eth_header(tbuf + off); + off += build_ipv4_header(tbuf + off, payload_len); + off += build_udp_header(tbuf + off, payload_len); + + if (off + payload_len > sizeof(tbuf)) + error(1, 0, "payload length exceeds max"); + + memset(tbuf + off, DATA_CHAR, payload_len); + + return off + payload_len; +} + +static void do_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(ETH_P_IP); + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind"); +} + +static void do_send(int fd, char *buf, int len) +{ + int ret; + + if (!cfg_use_vnet) { + buf += sizeof(struct virtio_net_hdr); + len -= sizeof(struct virtio_net_hdr); + } + if (cfg_use_dgram) { + buf += ETH_HLEN; + len -= ETH_HLEN; + } + + if (cfg_use_bind) { + ret = write(fd, buf, len); + } else { + struct sockaddr_ll laddr = {0}; + + laddr.sll_protocol = htons(ETH_P_IP); + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr)); + } + + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %u %u", ret, len); + + fprintf(stderr, "tx: %u\n", ret); +} + +static int do_tx(void) +{ + const int one = 1; + int fd, len; + + fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket t"); + + if (cfg_use_bind) + do_bind(fd); + + if (cfg_use_qdisc_bypass && + setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one))) + error(1, errno, "setsockopt qdisc bypass"); + + if (cfg_use_vnet && + setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt vnet"); + + len = build_packet(cfg_payload_len); + + if (cfg_truncate_len < len) + len = cfg_truncate_len; + + do_send(fd, tbuf, len); + + if (close(fd)) + error(1, errno, "close t"); + + return len; +} + +static int setup_rx(void) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct sockaddr_in raddr = {0}; + int fd; + + fd = socket(PF_INET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + raddr.sin_family = AF_INET; + raddr.sin_port = htons(cfg_port); + raddr.sin_addr.s_addr = htonl(INADDR_ANY); + + if (bind(fd, (void *)&raddr, sizeof(raddr))) + error(1, errno, "bind r"); + + return fd; +} + +static void do_rx(int fd, int expected_len, char *expected) +{ + int ret; + + ret = recv(fd, rbuf, sizeof(rbuf), 0); + if (ret == -1) + error(1, errno, "recv"); + if (ret != expected_len) + error(1, 0, "recv: %u != %u", ret, expected_len); + + if (memcmp(rbuf, expected, ret)) + error(1, 0, "recv: data mismatch"); + + fprintf(stderr, "rx: %u\n", ret); +} + +static int setup_sniffer(void) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fd; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + pair_udp_setfilter(fd); + do_bind(fd); + + return fd; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) { + switch (c) { + case 'b': + cfg_use_bind = true; + break; + case 'c': + cfg_use_csum_off = true; + break; + case 'C': + cfg_use_csum_off_bad = true; + break; + case 'd': + cfg_use_dgram = true; + break; + case 'g': + cfg_use_gso = true; + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'q': + cfg_use_qdisc_bypass = true; + break; + case 't': + cfg_truncate_len = strtoul(optarg, NULL, 0); + break; + case 'v': + cfg_use_vnet = true; + break; + case 'V': + cfg_use_vlan = true; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } + + if (cfg_use_vlan && cfg_use_dgram) + error(1, 0, "option vlan (-V) conflicts with dgram (-d)"); + + if (cfg_use_csum_off && !cfg_use_vnet) + error(1, 0, "option csum offload (-c) requires vnet (-v)"); + + if (cfg_use_csum_off_bad && !cfg_use_csum_off) + error(1, 0, "option csum bad (-C) requires csum offload (-c)"); + + if (cfg_use_gso && !cfg_use_csum_off) + error(1, 0, "option gso (-g) requires csum offload (-c)"); +} + +static void run_test(void) +{ + int fdr, fds, total_len; + + fdr = setup_rx(); + fds = setup_sniffer(); + + total_len = do_tx(); + + /* BPF filter accepts only this length, vlan changes MAC */ + if (cfg_payload_len == DATA_LEN && !cfg_use_vlan) + do_rx(fds, total_len - sizeof(struct virtio_net_hdr), + tbuf + sizeof(struct virtio_net_hdr)); + + do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len); + + if (close(fds)) + error(1, errno, "close s"); + if (close(fdr)) + error(1, errno, "close r"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (system("ip link set dev lo mtu 1500")) + error(1, errno, "ip link set mtu"); + if (system("ip addr add dev lo 172.17.0.1/24")) + error(1, errno, "ip addr add"); + + run_test(); + + fprintf(stderr, "OK\n\n"); + return 0; +} diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh new file mode 100755 index 000000000000..6331d91b86a6 --- /dev/null +++ b/tools/testing/selftests/net/psock_snd.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of packet socket send regression tests + +set -e + +readonly mtu=1500 +readonly iphlen=20 +readonly udphlen=8 + +readonly vnet_hlen=10 +readonly eth_hlen=14 + +readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))" +readonly mss_exceeds="$((${mss} + 1))" + +readonly max_mtu=65535 +readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))" +readonly max_mss_exceeds="$((${max_mss} + 1))" + +# functional checks (not a full cross-product) + +echo "dgram" +./in_netns.sh ./psock_snd -d + +echo "dgram bind" +./in_netns.sh ./psock_snd -d -b + +echo "raw" +./in_netns.sh ./psock_snd + +echo "raw bind" +./in_netns.sh ./psock_snd -b + +echo "raw qdisc bypass" +./in_netns.sh ./psock_snd -q + +echo "raw vlan" +./in_netns.sh ./psock_snd -V + +echo "raw vnet hdr" +./in_netns.sh ./psock_snd -v + +echo "raw csum_off" +./in_netns.sh ./psock_snd -v -c + +echo "raw csum_off with bad offset (fails)" +(! ./in_netns.sh ./psock_snd -v -c -C) + + +# bounds check: send {max, max + 1, min, min - 1} lengths + +echo "raw min size" +./in_netns.sh ./psock_snd -l 0 + +echo "raw mtu size" +./in_netns.sh ./psock_snd -l "${mss}" + +echo "raw mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}") + +# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed +# +# echo "raw vlan mtu size" +# ./in_netns.sh ./psock_snd -V -l "${mss}" + +echo "raw vlan mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}") + +echo "dgram mtu size" +./in_netns.sh ./psock_snd -d -l "${mss}" + +echo "dgram mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}") + +echo "raw truncate hlen (fails: does not arrive)" +(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))") + +echo "raw truncate hlen - 1 (fails: EINVAL)" +(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))") + + +# gso checks: implies -l, because with gso len must exceed gso_size + +echo "raw gso min size" +./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" + +echo "raw gso min size - 1 (fails)" +(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") + +echo "raw gso max size" +./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" + +echo "raw gso max size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}") + +echo "OK. All tests passed" diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 7f6cd9fdacf3..7ec4fa4d55dc 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -60,6 +60,8 @@ #include "psock_lib.h" +#include "../kselftest.h" + #ifndef bug_on # define bug_on(cond) assert(!(cond)) #endif @@ -825,7 +827,7 @@ static int test_tpacket(int version, int type) fprintf(stderr, "test: skip %s %s since user and kernel " "space have different bit width\n", tpacket_str[version], type_str[type]); - return 0; + return KSFT_SKIP; } sock = pfsocket(version); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e6f485235435..0d7a44fa30af 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -7,6 +7,9 @@ devdummy="test-dummy0" ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # set global exit status, but never reset nonzero one. check_err() { @@ -333,7 +336,7 @@ kci_test_vrf() ip link show type vrf 2>/dev/null if [ $? -ne 0 ]; then echo "SKIP: vrf: iproute2 too old" - return 0 + return $ksft_skip fi ip link add "$vrfname" type vrf table 10 @@ -409,7 +412,7 @@ kci_test_encap_fou() ip fou help 2>&1 |grep -q 'Usage: ip fou' if [ $? -ne 0 ];then echo "SKIP: fou: iproute2 too old" - return 1 + return $ksft_skip fi ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null @@ -444,7 +447,7 @@ kci_test_encap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP encap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip netns exec "$testns" ip link set lo up @@ -469,7 +472,7 @@ kci_test_macsec() ip macsec help 2>&1 | grep -q "^Usage: ip macsec" if [ $? -ne 0 ]; then echo "SKIP: macsec: iproute2 too old" - return 0 + return $ksft_skip fi ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on @@ -502,6 +505,108 @@ kci_test_macsec() echo "PASS: macsec" } +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 14.0.0.52/24 dst 14.0.0.70/24 +# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \ +# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \ +# spi 0x07 mode transport reqid 0x07 +# +# Subcommands not tested +# ip x s update +# ip x s allocspi +# ip x s deleteall +# ip x p update +# ip x p deleteall +# ip x p set +#------------------------------------------------------------------- +kci_test_ipsec() +{ + srcip="14.0.0.52" + dstip="14.0.0.70" + algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128" + + # flush to be sure there's nothing configured + ip x s flush ; ip x p flush + check_err $? + + # start the monitor in the background + tmpfile=`mktemp ipsectestXXX` + ip x m > $tmpfile & + mpid=$! + sleep 0.2 + + ipsecid="proto esp src $srcip dst $dstip spi 0x07" + ip x s add $ipsecid \ + mode transport reqid 0x07 replay-window 32 \ + $algo sel src $srcip/24 dst $dstip/24 + check_err $? + + lines=`ip x s list | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x s count | grep -q "SAD count 1" + check_err $? + + lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x s delete $ipsecid + check_err $? + + lines=`ip x s list | wc -l` + test $lines -eq 0 + check_err $? + + ipsecsel="dir out src $srcip/24 dst $dstip/24" + ip x p add $ipsecsel \ + tmpl proto esp src $srcip dst $dstip \ + spi 0x07 mode transport reqid 0x07 + check_err $? + + lines=`ip x p list | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0" + check_err $? + + lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x p delete $ipsecsel + check_err $? + + lines=`ip x p list | wc -l` + test $lines -eq 0 + check_err $? + + # check the monitor results + kill $mpid + lines=`wc -l $tmpfile | cut "-d " -f1` + test $lines -eq 20 + check_err $? + rm -rf $tmpfile + + # clean up any leftovers + ip x s flush + check_err $? + ip x p flush + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ipsec" + return 1 + fi + echo "PASS: ipsec" +} + kci_test_gretap() { testns="testns" @@ -511,14 +616,14 @@ kci_test_gretap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP gretap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip link help gretap 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: gretap: iproute2 too old" ip netns del "$testns" - return 1 + return $ksft_skip fi # test native tunnel @@ -561,14 +666,14 @@ kci_test_ip6gretap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP ip6gretap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip link help ip6gretap 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: ip6gretap: iproute2 too old" ip netns del "$testns" - return 1 + return $ksft_skip fi # test native tunnel @@ -611,13 +716,13 @@ kci_test_erspan() ip link help erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: erspan: iproute2 too old" - return 1 + return $ksft_skip fi ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP erspan tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi # test native tunnel erspan v1 @@ -676,13 +781,13 @@ kci_test_ip6erspan() ip link help ip6erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: ip6erspan: iproute2 too old" - return 1 + return $ksft_skip fi ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP ip6erspan tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi # test native tunnel ip6erspan v1 @@ -755,6 +860,7 @@ kci_test_rtnl() kci_test_vrf kci_test_encap kci_test_macsec + kci_test_ipsec kci_del_dummy } @@ -762,14 +868,14 @@ kci_test_rtnl() #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi for x in ip tc;do $x -Version 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: Could not run test without the $x tool" - exit 0 + exit $ksft_skip fi done diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c new file mode 100644 index 000000000000..d044b29ddabc --- /dev/null +++ b/tools/testing/selftests/net/tcp_inq.c @@ -0,0 +1,189 @@ +/* + * Copyright 2018 Google Inc. + * Author: Soheil Hassas Yeganeh (soheil@google.com) + * + * Simple example on how to use TCP_INQ and TCP_CM_INQ. + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + */ +#define _GNU_SOURCE + +#include <error.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <pthread.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> + +#ifndef TCP_INQ +#define TCP_INQ 36 +#endif + +#ifndef TCP_CM_INQ +#define TCP_CM_INQ TCP_INQ +#endif + +#define BUF_SIZE 8192 +#define CMSG_SIZE 32 + +static int family = AF_INET6; +static socklen_t addr_len = sizeof(struct sockaddr_in6); +static int port = 4974; + +static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr4->sin_port = htons(port); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_loopback; + addr6->sin6_port = htons(port); + break; + default: + error(1, 0, "illegal family"); + } +} + +void *start_server(void *arg) +{ + int server_fd = (int)(unsigned long)arg; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + char *buf; + int fd; + int r; + + buf = malloc(BUF_SIZE); + + for (;;) { + fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + break; + } + do { + r = send(fd, buf, BUF_SIZE, 0); + } while (r < 0 && errno == EINTR); + if (r < 0) + perror("send"); + if (r != BUF_SIZE) + fprintf(stderr, "can only send %d bytes\n", r); + /* TCP_INQ can overestimate in-queue by one byte if we send + * the FIN packet. Sleep for 1 second, so that the client + * likely invoked recvmsg(). + */ + sleep(1); + close(fd); + } + + free(buf); + close(server_fd); + pthread_exit(0); +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listen_addr, addr; + int c, one = 1, inq = -1; + pthread_t server_thread; + char cmsgbuf[CMSG_SIZE]; + struct iovec iov[1]; + struct cmsghdr *cm; + struct msghdr msg; + int server_fd, fd; + char *buf; + + while ((c = getopt(argc, argv, "46p:")) != -1) { + switch (c) { + case '4': + family = PF_INET; + addr_len = sizeof(struct sockaddr_in); + break; + case '6': + family = PF_INET6; + addr_len = sizeof(struct sockaddr_in6); + break; + case 'p': + port = atoi(optarg); + break; + } + } + + server_fd = socket(family, SOCK_STREAM, 0); + if (server_fd < 0) + error(1, errno, "server socket"); + setup_loopback_addr(family, &listen_addr); + if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, + &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + if (bind(server_fd, (const struct sockaddr *)&listen_addr, + addr_len) == -1) + error(1, errno, "bind"); + if (listen(server_fd, 128) == -1) + error(1, errno, "listen"); + if (pthread_create(&server_thread, NULL, start_server, + (void *)(unsigned long)server_fd) != 0) + error(1, errno, "pthread_create"); + + fd = socket(family, SOCK_STREAM, 0); + if (fd < 0) + error(1, errno, "client socket"); + setup_loopback_addr(family, &addr); + if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1) + error(1, errno, "connect"); + if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(TCP_INQ)"); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_flags = 0; + + buf = malloc(BUF_SIZE); + iov[0].iov_base = buf; + iov[0].iov_len = BUF_SIZE / 2; + + if (recvmsg(fd, &msg, 0) != iov[0].iov_len) + error(1, errno, "recvmsg"); + if (msg.msg_flags & MSG_CTRUNC) + error(1, 0, "control message is truncated"); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) + if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ) + inq = *((int *) CMSG_DATA(cm)); + + if (inq != BUF_SIZE - iov[0].iov_len) { + fprintf(stderr, "unexpected inq: %d\n", inq); + exit(1); + } + + printf("PASSED\n"); + free(buf); + close(fd); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c new file mode 100644 index 000000000000..77f762780199 --- /dev/null +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -0,0 +1,447 @@ +/* + * Copyright 2018 Google Inc. + * Author: Eric Dumazet (edumazet@google.com) + * + * Reference program demonstrating tcp mmap() usage, + * and SO_RCVLOWAT hints for receiver. + * + * Note : NIC with header split is needed to use mmap() on TCP : + * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload. + * + * How to use on loopback interface : + * + * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header) + * tcp_mmap -s -z & + * tcp_mmap -H ::1 -z + * + * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12) + * (4096 : page size on x86, 12: TCP TS option length) + * tcp_mmap -s -z -M $((4096+12)) & + * tcp_mmap -H ::1 -z -M $((4096+12)) + * + * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface. + * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;) + * + * $ ./tcp_mmap -s & # Without mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit + * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches + * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit + * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches + * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit + * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches + * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit + * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches + * $ kill %1 # kill tcp_mmap server + * + * $ ./tcp_mmap -s -z & # With mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit + * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit + * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit + * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit + * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#define _GNU_SOURCE +#include <pthread.h> +#include <sys/types.h> +#include <fcntl.h> +#include <error.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <poll.h> +#include <linux/tcp.h> +#include <assert.h> + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define FILE_SZ (1UL << 35) +static int cfg_family = AF_INET6; +static socklen_t cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_port = 8787; + +static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */ +static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */ +static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ +static int xflg; /* hash received data (simple xor) (-h option) */ +static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ + +static int chunk_size = 512*1024; + +unsigned long htotal; + +static inline void prefetch(const void *x) +{ +#if defined(__x86_64__) + asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x)); +#endif +} + +void hash_zone(void *zone, unsigned int length) +{ + unsigned long temp = htotal; + + while (length >= 8*sizeof(long)) { + prefetch(zone + 384); + temp ^= *(unsigned long *)zone; + temp ^= *(unsigned long *)(zone + sizeof(long)); + temp ^= *(unsigned long *)(zone + 2*sizeof(long)); + temp ^= *(unsigned long *)(zone + 3*sizeof(long)); + temp ^= *(unsigned long *)(zone + 4*sizeof(long)); + temp ^= *(unsigned long *)(zone + 5*sizeof(long)); + temp ^= *(unsigned long *)(zone + 6*sizeof(long)); + temp ^= *(unsigned long *)(zone + 7*sizeof(long)); + zone += 8*sizeof(long); + length -= 8*sizeof(long); + } + while (length >= 1) { + temp ^= *(unsigned char *)zone; + zone += 1; + length--; + } + htotal = temp; +} + +void *child_thread(void *arg) +{ + unsigned long total_mmap = 0, total = 0; + struct tcp_zerocopy_receive zc; + unsigned long delta_usec; + int flags = MAP_SHARED; + struct timeval t0, t1; + char *buffer = NULL; + void *addr = NULL; + double throughput; + struct rusage ru; + int lu, fd; + + fd = (int)(unsigned long)arg; + + gettimeofday(&t0, NULL); + + fcntl(fd, F_SETFL, O_NDELAY); + buffer = malloc(chunk_size); + if (!buffer) { + perror("malloc"); + goto error; + } + if (zflg) { + addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0); + if (addr == (void *)-1) + zflg = 0; + } + while (1) { + struct pollfd pfd = { .fd = fd, .events = POLLIN, }; + int sub; + + poll(&pfd, 1, 10000); + if (zflg) { + socklen_t zc_len = sizeof(zc); + int res; + + zc.address = (__u64)addr; + zc.length = chunk_size; + zc.recv_skip_hint = 0; + res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, + &zc, &zc_len); + if (res == -1) + break; + + if (zc.length) { + assert(zc.length <= chunk_size); + total_mmap += zc.length; + if (xflg) + hash_zone(addr, zc.length); + total += zc.length; + } + if (zc.recv_skip_hint) { + assert(zc.recv_skip_hint <= chunk_size); + lu = read(fd, buffer, zc.recv_skip_hint); + if (lu > 0) { + if (xflg) + hash_zone(buffer, lu); + total += lu; + } + } + continue; + } + sub = 0; + while (sub < chunk_size) { + lu = read(fd, buffer + sub, chunk_size - sub); + if (lu == 0) + goto end; + if (lu < 0) + break; + if (xflg) + hash_zone(buffer + sub, lu); + total += lu; + sub += lu; + } + } +end: + gettimeofday(&t1, NULL); + delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + + throughput = 0; + if (delta_usec) + throughput = total * 8.0 / (double)delta_usec / 1000.0; + getrusage(RUSAGE_THREAD, &ru); + if (total > 1024*1024) { + unsigned long total_usec; + unsigned long mb = total >> 20; + total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + + 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; + printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" + " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", + total / (1024.0 * 1024.0), + 100.0*total_mmap/total, + (double)delta_usec / 1000000.0, + throughput, + (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, + (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, + (double)total_usec/mb, + ru.ru_nvcsw); + } +error: + free(buffer); + close(fd); + if (zflg) + munmap(addr, chunk_size); + pthread_exit(0); +} + +static void apply_rcvsnd_buf(int fd) +{ + if (rcvbuf && setsockopt(fd, SOL_SOCKET, + SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) { + perror("setsockopt SO_RCVBUF"); + } + + if (sndbuf && setsockopt(fd, SOL_SOCKET, + SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) { + perror("setsockopt SO_SNDBUF"); + } +} + + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void do_accept(int fdlisten) +{ + if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, + &chunk_size, sizeof(chunk_size)) == -1) { + perror("setsockopt SO_RCVLOWAT"); + } + + apply_rcvsnd_buf(fdlisten); + + while (1) { + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + pthread_t th; + int fd, res; + + fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + continue; + } + res = pthread_create(&th, NULL, child_thread, + (void *)(unsigned long)fd); + if (res) { + errno = res; + perror("pthread_create"); + close(fd); + } + } +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listenaddr, addr; + unsigned int max_pacing_rate = 0; + unsigned long total = 0; + char *host = NULL; + int fd, c, on = 1; + char *buffer; + int sflg = 0; + int mss = 0; + + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'p': + cfg_port = atoi(optarg); + break; + case 'H': + host = optarg; + break; + case 's': /* server : listen for incoming connections */ + sflg++; + break; + case 'r': + rcvbuf = atoi(optarg); + break; + case 'w': + sndbuf = atoi(optarg); + break; + case 'z': + zflg = 1; + break; + case 'M': + mss = atoi(optarg); + break; + case 'x': + xflg = 1; + break; + case 'k': + keepflag = 1; + break; + case 'P': + max_pacing_rate = atoi(optarg) ; + break; + default: + exit(1); + } + } + if (sflg) { + int fdlisten = socket(cfg_family, SOCK_STREAM, 0); + + if (fdlisten == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fdlisten); + setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + + setup_sockaddr(cfg_family, host, &listenaddr); + + if (mss && + setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG, + &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) { + perror("bind"); + exit(1); + } + if (listen(fdlisten, 128) == -1) { + perror("listen"); + exit(1); + } + do_accept(fdlisten); + } + buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer == (char *)-1) { + perror("mmap"); + exit(1); + } + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fd); + + setup_sockaddr(cfg_family, host, &addr); + + if (mss && + setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) { + perror("connect"); + exit(1); + } + if (max_pacing_rate && + setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE, + &max_pacing_rate, sizeof(max_pacing_rate)) == -1) + perror("setsockopt SO_MAX_PACING_RATE"); + + if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, + &on, sizeof(on)) == -1) { + perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); + zflg = 0; + } + while (total < FILE_SZ) { + long wr = FILE_SZ - total; + + if (wr > chunk_size) + wr = chunk_size; + /* Note : we just want to fill the pipe with 0 bytes */ + wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0); + if (wr <= 0) + break; + total += wr; + } + close(fd); + munmap(buffer, chunk_size); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c new file mode 100644 index 000000000000..e279051bc631 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <net/if.h> +#include <linux/in.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef UDP_MAX_SEGMENTS +#define UDP_MAX_SEGMENTS (1 << 6UL) +#endif + +#define CONST_MTU_TEST 1500 + +#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) +#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) + +#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) +#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) + +#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) +#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_do_connected; +static bool cfg_do_connectionless; +static bool cfg_do_msgmore; +static bool cfg_do_setsockopt; +static int cfg_specific_test_id = -1; + +static const char cfg_ifname[] = "lo"; +static unsigned short cfg_port = 9000; + +static char buf[ETH_MAX_MTU]; + +struct testcase { + int tlen; /* send() buffer size, may exceed mss */ + bool tfail; /* send() call is expected to fail */ + int gso_len; /* mss after applying gso */ + int r_num_mss; /* recv(): number of calls of full mss */ + int r_len_last; /* recv(): size of last non-mss dgram, if any */ +}; + +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; + +struct testcase testcases_v4[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V4, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V4 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V4 * 2, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V4 * 2) + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), + }, + + { + /* send MAX bytes */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = CONST_MAX_SEGS_V4, + .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - + (CONST_MAX_SEGS_V4 * CONST_MSS_V4), + }, + { + /* send MAX + 1: fail */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, + .gso_len = CONST_MSS_V4, + .tfail = true, + }, + { + /* send a single 1B MSS: will fail, see single MSS above */ + .tlen = 1, + .gso_len = 1, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send 2 1B segments */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + }, + { + /* send 2B + 2B + 1B segments */ + .tlen = 5, + .gso_len = 2, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send max number of min sized segments */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .gso_len = 1, + .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + }, + { + /* send max number + 1 of min sized segments: fail */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .gso_len = 1, + .tfail = true, + }, + { + /* EOL */ + } +}; + +#ifndef IP6_MAX_MTU +#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) +#endif + +struct testcase testcases_v6[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V6, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V6 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V6 * 2, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V6 * 2) + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), + }, + + { + /* send MAX bytes */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = CONST_MAX_SEGS_V6, + .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - + (CONST_MAX_SEGS_V6 * CONST_MSS_V6), + }, + { + /* send MAX + 1: fail */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, + .gso_len = CONST_MSS_V6, + .tfail = true, + }, + { + /* send a single 1B MSS: will fail, see single MSS above */ + .tlen = 1, + .gso_len = 1, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send 2 1B segments */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + }, + { + /* send 2B + 2B + 1B segments */ + .tlen = 5, + .gso_len = 2, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send max number of min sized segments */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .gso_len = 1, + .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + }, + { + /* send max number + 1 of min sized segments: fail */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .gso_len = 1, + .tfail = true, + }, + { + /* EOL */ + } +}; + +static unsigned int get_device_mtu(int fd, const char *ifname) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFMTU, &ifr)) + error(1, errno, "ioctl get mtu"); + + return ifr.ifr_mtu; +} + +static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_mtu = mtu; + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCSIFMTU, &ifr)) + error(1, errno, "ioctl set mtu"); +} + +static void set_device_mtu(int fd, int mtu) +{ + int val; + + val = get_device_mtu(fd, cfg_ifname); + fprintf(stderr, "device mtu (orig): %u\n", val); + + __set_device_mtu(fd, cfg_ifname, mtu); + val = get_device_mtu(fd, cfg_ifname); + if (val != mtu) + error(1, 0, "unable to set device mtu to %u\n", val); + + fprintf(stderr, "device mtu (test): %u\n", val); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +static unsigned int get_path_mtu(int fd, bool is_ipv4) +{ + socklen_t vallen; + unsigned int mtu; + int ret; + + vallen = sizeof(mtu); + if (is_ipv4) + ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); + else + ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); + + if (ret) + error(1, errno, "getsockopt mtu"); + + + fprintf(stderr, "path mtu (read): %u\n", mtu); + return mtu; +} + +/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ +static void set_route_mtu(int mtu, bool is_ipv4) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rt; + char data[NLMSG_ALIGN(sizeof(*nh)) + + NLMSG_ALIGN(sizeof(*rt)) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + + NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; + int fd, ret, alen, off = 0; + + alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) + error(1, errno, "socket netlink"); + + memset(data, 0, sizeof(data)); + + nh = (void *)data; + nh->nlmsg_type = RTM_NEWROUTE; + nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + off += NLMSG_ALIGN(sizeof(*nh)); + + rt = (void *)(data + off); + rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; + rt->rtm_table = RT_TABLE_MAIN; + rt->rtm_dst_len = alen << 3; + rt->rtm_protocol = RTPROT_BOOT; + rt->rtm_scope = RT_SCOPE_UNIVERSE; + rt->rtm_type = RTN_UNICAST; + off += NLMSG_ALIGN(sizeof(*rt)); + + rta = (void *)(data + off); + rta->rta_type = RTA_DST; + rta->rta_len = RTA_LENGTH(alen); + if (is_ipv4) + memcpy(RTA_DATA(rta), &addr4, alen); + else + memcpy(RTA_DATA(rta), &addr6, alen); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = RTA_OIF; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); + off += NLMSG_ALIGN(rta->rta_len); + + /* MTU is a subtype in a metrics type */ + rta = (void *)(data + off); + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); + off += NLMSG_ALIGN(rta->rta_len); + + /* now fill MTU subtype. Note that it fits within above rta_len */ + rta = (void *)(((char *) rta) + RTA_LENGTH(0)); + rta->rta_type = RTAX_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = mtu; + + nh->nlmsg_len = off; + + ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != off) + error(1, errno, "send netlink: %uB != %uB\n", ret, off); + + if (close(fd)) + error(1, errno, "close netlink"); + + fprintf(stderr, "route mtu (test): %u\n", mtu); +} + +static bool __send_one(int fd, struct msghdr *msg, int flags) +{ + int ret; + + ret = sendmsg(fd, msg, flags); + if (ret == -1 && + (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL)) + return false; + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != msg->msg_iov->iov_len) + error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); + if (msg->msg_flags) + error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); + + return true; +} + +static bool send_one(int fd, int len, int gso_len, + struct sockaddr *addr, socklen_t alen) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_name = addr; + msg.msg_namelen = alen; + + if (gso_len && !cfg_do_setsockopt) { + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *((uint16_t *) CMSG_DATA(cm)) = gso_len; + } + + /* If MSG_MORE, send 1 byte followed by remainder */ + if (cfg_do_msgmore && len > 1) { + iov.iov_len = 1; + if (!__send_one(fd, &msg, MSG_MORE)) + error(1, 0, "send 1B failed"); + + iov.iov_base++; + iov.iov_len = len - 1; + } + + return __send_one(fd, &msg, 0); +} + +static int recv_one(int fd, int flags) +{ + int ret; + + ret = recv(fd, buf, sizeof(buf), flags); + if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) + return 0; + if (ret == -1) + error(1, errno, "recv"); + + return ret; +} + +static void run_one(struct testcase *test, int fdt, int fdr, + struct sockaddr *addr, socklen_t alen) +{ + int i, ret, val, mss; + bool sent; + + fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + addr->sa_family == AF_INET ? 4 : 6, + test->tlen, test->gso_len, + test->tfail ? "(fail)" : ""); + + val = test->gso_len; + if (cfg_do_setsockopt) { + if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) + error(1, errno, "setsockopt udp segment"); + } + + sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); + if (sent && test->tfail) + error(1, 0, "send succeeded while expecting failure"); + if (!sent && !test->tfail) + error(1, 0, "send failed while expecting success"); + if (!sent) + return; + + if (test->gso_len) + mss = test->gso_len; + else + mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; + + + /* Recv all full MSS datagrams */ + for (i = 0; i < test->r_num_mss; i++) { + ret = recv_one(fdr, 0); + if (ret != mss) + error(1, 0, "recv.%d: %d != %d", i, ret, mss); + } + + /* Recv the non-full last datagram, if tlen was not a multiple of mss */ + if (test->r_len_last) { + ret = recv_one(fdr, 0); + if (ret != test->r_len_last) + error(1, 0, "recv.%d: %d != %d (last)", + i, ret, test->r_len_last); + } + + /* Verify received all data */ + ret = recv_one(fdr, MSG_DONTWAIT); + if (ret) + error(1, 0, "recv: unexpected datagram"); +} + +static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) +{ + struct testcase *tests, *test; + + tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; + + for (test = tests; test->tlen; test++) { + /* if a specific test is given, then skip all others */ + if (cfg_specific_test_id == -1 || + cfg_specific_test_id == test - tests) + run_one(test, fdt, fdr, addr, alen); + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fdr, fdt, val; + + fdr = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdr == -1) + error(1, errno, "socket r"); + + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + + /* Have tests fail quickly instead of hang */ + if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + fdt = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + /* Do not fragment these datagrams: only succeed if GSO works */ + set_pmtu_discover(fdt, addr->sa_family == AF_INET); + + if (cfg_do_connectionless) { + set_device_mtu(fdt, CONST_MTU_TEST); + run_all(fdt, fdr, addr, alen); + } + + if (cfg_do_connected) { + set_device_mtu(fdt, CONST_MTU_TEST + 100); + set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); + + if (connect(fdt, addr, alen)) + error(1, errno, "connect"); + + val = get_path_mtu(fdt, addr->sa_family == AF_INET); + if (val != CONST_MTU_TEST) + error(1, 0, "bad path mtu %u\n", val); + + run_all(fdt, fdr, addr, 0 /* use connected addr */); + } + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); +} + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr)); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr)); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'c': + cfg_do_connected = true; + break; + case 'C': + cfg_do_connectionless = true; + break; + case 'm': + cfg_do_msgmore = true; + break; + case 's': + cfg_do_setsockopt = true; + break; + case 't': + cfg_specific_test_id = strtoul(optarg, NULL, 0); + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh new file mode 100755 index 000000000000..fec24f584fe9 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso regression tests + +echo "ipv4 cmsg" +./in_netns.sh ./udpgso -4 -C + +echo "ipv4 setsockopt" +./in_netns.sh ./udpgso -4 -C -s + +echo "ipv6 cmsg" +./in_netns.sh ./udpgso -6 -C + +echo "ipv6 setsockopt" +./in_netns.sh ./udpgso -6 -C -s + +echo "ipv4 connected" +./in_netns.sh ./udpgso -4 -c + +# blocked on 2nd loopback address +# echo "ipv6 connected" +# ./in_netns.sh ./udpgso -6 -c + +echo "ipv4 msg_more" +./in_netns.sh ./udpgso -4 -C -m + +echo "ipv6 msg_more" +./in_netns.sh ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh new file mode 100755 index 000000000000..850767befa47 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso benchmarks + +wake_children() { + local -r jobs="$(jobs -p)" + + if [[ "${jobs}" != "" ]]; then + kill -1 ${jobs} 2>/dev/null + fi +} +trap wake_children EXIT + +run_one() { + local -r args=$@ + + ./udpgso_bench_rx & + ./udpgso_bench_rx -t & + + ./udpgso_bench_tx ${args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp" + run_in_netns ${args} + + echo "udp gso" + run_in_netns ${args} -S +} + +run_tcp() { + local -r args=$@ + + echo "tcp" + run_in_netns ${args} -t + + echo "tcp zerocopy" + run_in_netns ${args} -t -z +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 127.0.0.1" + local -r ipv6_args="${core_args} -6 -D ::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c new file mode 100644 index 000000000000..727cf67a3f75 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static int cfg_port = 8000; +static bool cfg_tcp; +static bool cfg_verify; + +static bool interrupted; +static unsigned long packets, bytes; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.events = POLLIN; + pfd.revents = 0; + pfd.fd = fd; + + do { + ret = poll(&pfd, 1, 10); + if (ret == -1) + error(1, errno, "poll"); + if (ret == 0) + continue; + if (pfd.revents != POLLIN) + error(1, errno, "poll: 0x%x expected 0x%x\n", + pfd.revents, POLLIN); + } while (!ret && !interrupted); +} + +static int do_socket(bool do_tcp) +{ + struct sockaddr_in6 addr = {0}; + int fd, val; + + fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + val = 1 << 21; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) + error(1, errno, "setsockopt reuseport"); + + addr.sin6_family = PF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = in6addr_any; + if (bind(fd, (void *) &addr, sizeof(addr))) + error(1, errno, "bind"); + + if (do_tcp) { + int accept_fd = fd; + + if (listen(accept_fd, 1)) + error(1, errno, "listen"); + + do_poll(accept_fd); + + fd = accept(accept_fd, NULL, NULL); + if (fd == -1) + error(1, errno, "accept"); + if (close(accept_fd)) + error(1, errno, "close accept fd"); + } + + return fd; +} + +/* Flush all outstanding bytes for the tcp receive queue */ +static void do_flush_tcp(int fd) +{ + int ret; + + while (true) { + /* MSG_TRUNC flushes up to len bytes */ + ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "flush"); + if (ret == 0) { + /* client detached */ + exit(0); + } + + packets++; + bytes += ret; + } + +} + +static char sanitized_char(char val) +{ + return (val >= 'a' && val <= 'z') ? val : '.'; +} + +static void do_verify_udp(const char *data, int len) +{ + char cur = data[0]; + int i; + + /* verify contents */ + if (cur < 'a' || cur > 'z') + error(1, 0, "data initial byte out of range"); + + for (i = 1; i < len; i++) { + if (cur == 'z') + cur = 'a'; + else + cur++; + + if (data[i] != cur) + error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n", + i, len, + sanitized_char(data[i]), data[i], + sanitized_char(cur), cur); + } +} + +/* Flush all outstanding datagrams. Verify first few bytes of each. */ +static void do_flush_udp(int fd) +{ + static char rbuf[ETH_DATA_LEN]; + int ret, len, budget = 256; + + len = cfg_verify ? sizeof(rbuf) : 0; + while (budget--) { + /* MSG_TRUNC will make return value full datagram length */ + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "recv"); + if (len) { + if (ret == 0) + error(1, errno, "recv: 0 byte datagram\n"); + + do_verify_udp(rbuf, ret); + } + + packets++; + bytes += ret; + } +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-tv] [-p port]", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "ptv")) != -1) { + switch (c) { + case 'p': + cfg_port = htons(strtoul(optarg, NULL, 0)); + break; + case 't': + cfg_tcp = true; + break; + case 'v': + cfg_verify = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_tcp && cfg_verify) + error(1, 0, "TODO: implement verify mode for tcp"); +} + +static void do_recv(void) +{ + unsigned long tnow, treport; + int fd; + + fd = do_socket(cfg_tcp); + + treport = gettimeofday_ms() + 1000; + do { + do_poll(fd); + + if (cfg_tcp) + do_flush_tcp(fd); + else + do_flush_udp(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + if (packets) + fprintf(stderr, + "%s rx: %6lu MB/s %8lu calls/s\n", + cfg_tcp ? "tcp" : "udp", + bytes >> 20, packets); + bytes = packets = 0; + treport = tnow + 1000; + } + + } while (!interrupted); + + if (close(fd)) + error(1, errno, "close"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + signal(SIGINT, sigint_handler); + + do_recv(); + + return 0; +} diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c new file mode 100644 index 000000000000..e821564053cf --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef SO_ZEROCOPY +#define SO_ZEROCOPY 60 +#endif + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define NUM_PKT 100 + +static bool cfg_cache_trash; +static int cfg_cpu = -1; +static int cfg_connected = true; +static int cfg_family = PF_UNSPEC; +static uint16_t cfg_mss; +static int cfg_payload_len = (1472 * 42); +static int cfg_port = 8000; +static int cfg_runtime_ms = -1; +static bool cfg_segment; +static bool cfg_sendmmsg; +static bool cfg_tcp; +static bool cfg_zerocopy; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static bool interrupted; +static char buf[NUM_PKT][ETH_MAX_MTU]; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int set_cpu(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + error(1, 0, "setaffinity %d", cpu); + + return 0; +} + +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void flush_zerocopy(int fd) +{ + struct msghdr msg = {0}; /* flush */ + int ret; + + while (1) { + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC)) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + msg.msg_flags = 0; + } +} + +static int send_tcp(int fd, char *data) +{ + int ret, done = 0, count = 0; + + while (done < cfg_payload_len) { + ret = send(fd, data + done, cfg_payload_len - done, + cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "write"); + + done += ret; + count++; + } + + return count; +} + +static int send_udp(int fd, char *data) +{ + int ret, total_len, len, count = 0; + + total_len = cfg_payload_len; + + while (total_len) { + len = total_len < cfg_mss ? total_len : cfg_mss; + + ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, + cfg_connected ? NULL : (void *)&cfg_dst_addr, + cfg_connected ? 0 : cfg_alen); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, errno, "write: %uB != %uB\n", ret, len); + + total_len -= len; + count++; + } + + return count; +} + +static int send_udp_sendmmsg(int fd, char *data) +{ + const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; + struct mmsghdr mmsgs[max_nr_msg]; + struct iovec iov[max_nr_msg]; + unsigned int off = 0, left; + int i = 0, ret; + + memset(mmsgs, 0, sizeof(mmsgs)); + + left = cfg_payload_len; + while (left) { + if (i == max_nr_msg) + error(1, 0, "sendmmsg: exceeds max_nr_msg"); + + iov[i].iov_base = data + off; + iov[i].iov_len = cfg_mss < left ? cfg_mss : left; + + mmsgs[i].msg_hdr.msg_iov = iov + i; + mmsgs[i].msg_hdr.msg_iovlen = 1; + + off += iov[i].iov_len; + left -= iov[i].iov_len; + i++; + } + + ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmmsg"); + + return ret; +} + +static void send_udp_segment_cmsg(struct cmsghdr *cm) +{ + uint16_t *valp; + + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + valp = (void *)CMSG_DATA(cm); + *valp = cfg_mss; +} + +static int send_udp_segment(int fd, char *data) +{ + char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + int ret; + + iov.iov_base = data; + iov.iov_len = cfg_payload_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg)); + + msg.msg_name = (void *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; + + ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != iov.iov_len) + error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); + + return 1; +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int max_len, hdrlen; + int c; + + while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'c': + cfg_cache_trash = true; + break; + case 'C': + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': + setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + break; + case 'l': + cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; + break; + case 'm': + cfg_sendmmsg = true; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'S': + cfg_segment = true; + break; + case 't': + cfg_tcp = true; + break; + case 'u': + cfg_connected = false; + break; + case 'z': + cfg_zerocopy = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_family == PF_UNSPEC) + error(1, 0, "must pass one of -4 or -6"); + if (cfg_tcp && !cfg_connected) + error(1, 0, "connectionless tcp makes no sense"); + if (cfg_segment && cfg_sendmmsg) + error(1, 0, "cannot combine segment offload and sendmmsg"); + + if (cfg_family == PF_INET) + hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); + else + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); + + cfg_mss = ETH_DATA_LEN - hdrlen; + max_len = ETH_MAX_MTU - hdrlen; + + if (cfg_payload_len > max_len) + error(1, 0, "payload length %u exceeds max %u", + cfg_payload_len, max_len); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +int main(int argc, char **argv) +{ + unsigned long num_msgs, num_sends; + unsigned long tnow, treport, tstop; + int fd, i, val; + + parse_opts(argc, argv); + + if (cfg_cpu > 0) + set_cpu(cfg_cpu); + + for (i = 0; i < sizeof(buf[0]); i++) + buf[0][i] = 'a' + (i % 26); + for (i = 1; i < NUM_PKT; i++) + memcpy(buf[i], buf[0], sizeof(buf[0])); + + signal(SIGINT, sigint_handler); + + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + if (cfg_zerocopy) { + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + error(1, errno, "setsockopt zerocopy"); + } + + if (cfg_connected && + connect(fd, (void *)&cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + + if (cfg_segment) + set_pmtu_discover(fd, cfg_family == PF_INET); + + num_msgs = num_sends = 0; + tnow = gettimeofday_ms(); + tstop = tnow + cfg_runtime_ms; + treport = tnow + 1000; + + i = 0; + do { + if (cfg_tcp) + num_sends += send_tcp(fd, buf[i]); + else if (cfg_segment) + num_sends += send_udp_segment(fd, buf[i]); + else if (cfg_sendmmsg) + num_sends += send_udp_sendmmsg(fd, buf[i]); + else + num_sends += send_udp(fd, buf[i]); + num_msgs++; + + if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) + flush_zerocopy(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + fprintf(stderr, + "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", + cfg_tcp ? "tcp" : "udp", + (num_msgs * cfg_payload_len) >> 20, + num_sends, num_msgs); + num_msgs = num_sends = 0; + treport = tnow + 1000; + } + + /* cold cache when writing buffer */ + if (cfg_cache_trash) + i = ++i < NUM_PKT ? i : 0; + + } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); + + if (close(fd)) + error(1, errno, "close"); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index f6b1338730db..201b598558b9 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -17,7 +17,6 @@ SUB_DIRS = alignment \ benchmarks \ cache_shape \ copyloops \ - context_switch \ dscr \ mm \ pmu \ diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore index 1d980e3d7039..9d383073b7ad 100644 --- a/tools/testing/selftests/powerpc/alignment/.gitignore +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -3,3 +3,4 @@ copy_first_unaligned paste_unaligned paste_last_unaligned copy_paste_unaligned_common +alignment_handler diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c index 3c9c144192be..c14b0fc1edde 100644 --- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c @@ -6,8 +6,11 @@ * Copyright 2018, Anton Blanchard, IBM Corp. */ -void _exit(int); +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> + void _start(void) { - _exit(0); + syscall(SYS_exit, 0); } diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore deleted file mode 100644 index c1431af7b51c..000000000000 --- a/tools/testing/selftests/powerpc/context_switch/.gitignore +++ /dev/null @@ -1 +0,0 @@ -cp_abort diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile deleted file mode 100644 index e9351bb4285d..000000000000 --- a/tools/testing/selftests/powerpc/context_switch/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -TEST_GEN_PROGS := cp_abort - -include ../../lib.mk - -$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c deleted file mode 100644 index 5a5b55afda0e..000000000000 --- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Adapted from Anton Blanchard's context switch microbenchmark. - * - * Copyright 2009, Anton Blanchard, IBM Corporation. - * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * This program tests the copy paste abort functionality of a P9 - * (or later) by setting up two processes on the same CPU, one - * which executes the copy instruction and the other which - * executes paste. - * - * The paste instruction should never succeed, as the cp_abort - * instruction is called by the kernel during a context switch. - * - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include "utils.h" -#include <sched.h> - -#define READ_FD 0 -#define WRITE_FD 1 - -#define NUM_LOOPS 1000 - -/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */ -#define PASTE(RA, RB, L, RC) \ - .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31)) - -int paste(void *i) -{ - int cr; - - asm volatile(str(PASTE(0, %1, 1, 1))";" - "mfcr %0;" - : "=r" (cr) - : "b" (i) - : "memory" - ); - return cr; -} - -/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */ -#define COPY(RA, RB, L) \ - .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) - -void copy(void *i) -{ - asm volatile(str(COPY(0, %0, 1))";" - : - : "b" (i) - : "memory" - ); -} - -int test_cp_abort(void) -{ - /* 128 bytes for a full cache line */ - char buf[128] __cacheline_aligned; - cpu_set_t cpuset; - int fd1[2], fd2[2], pid; - char c; - - /* only run this test on a P9 or later */ - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); - - /* - * Run both processes on the same CPU, so that copy is more likely - * to leak into a paste. - */ - CPU_ZERO(&cpuset); - CPU_SET(pick_online_cpu(), &cpuset); - FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset)); - - FAIL_IF(pipe(fd1) || pipe(fd2)); - - pid = fork(); - FAIL_IF(pid < 0); - - if (!pid) { - for (int i = 0; i < NUM_LOOPS; i++) { - FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1); - FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1); - /* A paste succeeds if CR0 EQ bit is set */ - FAIL_IF(paste(buf) & 0x20000000); - } - } else { - for (int i = 0; i < NUM_LOOPS; i++) { - FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1); - copy(buf); - FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1)); - } - } - return 0; - -} - -int main(int argc, char *argv[]) -{ - return test_harness(test_cp_abort, "cp_abort"); -} diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 4afdebcce4cd..7f348c059bc2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -54,6 +54,7 @@ #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ +#define SPRN_AMR 13 /* Authority Mask Register - problem state */ /* TEXASR register bits */ #define TEXASR_FC 0xFE00000000000000 diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 349acfafc95b..07ec449a2767 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -8,3 +8,5 @@ ptrace-vsx ptrace-tm-vsx ptrace-tm-spd-vsx ptrace-tm-spr +ptrace-hwbreak +perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 480305266504..28f5b781a553 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr + ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ + perf-hwbreak include ../../lib.mk @@ -9,6 +10,9 @@ all: $(TEST_PROGS) CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie +ptrace-pkey core-pkey: child.h +ptrace-pkey core-pkey: LDLIBS += -pthread + $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h clean: diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h new file mode 100644 index 000000000000..d7275b7b33dc --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/child.h @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Helper functions to sync execution between parent and child processes. + * + * Copyright 2018, Thiago Jung Bauermann, IBM Corporation. + */ +#include <stdio.h> +#include <stdbool.h> +#include <semaphore.h> + +/* + * Information in a shared memory location for synchronization between child and + * parent. + */ +struct child_sync { + /* The parent waits on this semaphore. */ + sem_t sem_parent; + + /* If true, the child should give up as well. */ + bool parent_gave_up; + + /* The child waits on this semaphore. */ + sem_t sem_child; + + /* If true, the parent should give up as well. */ + bool child_gave_up; +}; + +#define CHILD_FAIL_IF(x, sync) \ + do { \ + if (x) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + (sync)->child_gave_up = true; \ + prod_parent(sync); \ + return 1; \ + } \ + } while (0) + +#define PARENT_FAIL_IF(x, sync) \ + do { \ + if (x) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + (sync)->parent_gave_up = true; \ + prod_child(sync); \ + return 1; \ + } \ + } while (0) + +#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \ + do { \ + if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \ + (sync)->parent_gave_up = true; \ + prod_child(sync); \ + SKIP_IF(1); \ + } \ + } while (0) + +int init_child_sync(struct child_sync *sync) +{ + int ret; + + ret = sem_init(&sync->sem_parent, 1, 0); + if (ret) { + perror("Semaphore initialization failed"); + return 1; + } + + ret = sem_init(&sync->sem_child, 1, 0); + if (ret) { + perror("Semaphore initialization failed"); + return 1; + } + + return 0; +} + +void destroy_child_sync(struct child_sync *sync) +{ + sem_destroy(&sync->sem_parent); + sem_destroy(&sync->sem_child); +} + +int wait_child(struct child_sync *sync) +{ + int ret; + + /* Wait until the child prods us. */ + ret = sem_wait(&sync->sem_parent); + if (ret) { + perror("Error waiting for child"); + return 1; + } + + return sync->child_gave_up; +} + +int prod_child(struct child_sync *sync) +{ + int ret; + + /* Unblock the child now. */ + ret = sem_post(&sync->sem_child); + if (ret) { + perror("Error prodding child"); + return 1; + } + + return 0; +} + +int wait_parent(struct child_sync *sync) +{ + int ret; + + /* Wait until the parent prods us. */ + ret = sem_wait(&sync->sem_child); + if (ret) { + perror("Error waiting for parent"); + return 1; + } + + return sync->parent_gave_up; +} + +int prod_parent(struct child_sync *sync) +{ + int ret; + + /* Unblock the parent now. */ + ret = sem_post(&sync->sem_parent); + if (ret) { + perror("Error prodding parent"); + return 1; + } + + return 0; +} diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c new file mode 100644 index 000000000000..36bc312b1f5c --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Ptrace test for Memory Protection Key registers + * + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. + * Copyright (C) 2018 IBM Corporation. + */ +#include <limits.h> +#include <linux/kernel.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include "ptrace.h" +#include "child.h" + +#ifndef __NR_pkey_alloc +#define __NR_pkey_alloc 384 +#endif + +#ifndef __NR_pkey_free +#define __NR_pkey_free 385 +#endif + +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + +#ifndef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 +#endif + +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + +#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ + +static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern"; + +static const char user_write[] = "[User Write (Running)]"; +static const char core_read_running[] = "[Core Read (Running)]"; + +/* Information shared between the parent and the child. */ +struct shared_info { + struct child_sync child_sync; + + /* AMR value the parent expects to read in the core file. */ + unsigned long amr; + + /* IAMR value the parent expects to read in the core file. */ + unsigned long iamr; + + /* UAMOR value the parent expects to read in the core file. */ + unsigned long uamor; + + /* When the child crashed. */ + time_t core_time; +}; + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) +{ + return syscall(__NR_pkey_alloc, flags, init_access_rights); +} + +static int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +static int increase_core_file_limit(void) +{ + struct rlimit rlim; + int ret; + + ret = getrlimit(RLIMIT_CORE, &rlim); + FAIL_IF(ret); + + if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) { + rlim.rlim_cur = CORE_FILE_LIMIT; + + if (rlim.rlim_max != RLIM_INFINITY && + rlim.rlim_max < CORE_FILE_LIMIT) + rlim.rlim_max = CORE_FILE_LIMIT; + + ret = setrlimit(RLIMIT_CORE, &rlim); + FAIL_IF(ret); + } + + ret = getrlimit(RLIMIT_FSIZE, &rlim); + FAIL_IF(ret); + + if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) { + rlim.rlim_cur = CORE_FILE_LIMIT; + + if (rlim.rlim_max != RLIM_INFINITY && + rlim.rlim_max < CORE_FILE_LIMIT) + rlim.rlim_max = CORE_FILE_LIMIT; + + ret = setrlimit(RLIMIT_FSIZE, &rlim); + FAIL_IF(ret); + } + + return TEST_PASS; +} + +static int child(struct shared_info *info) +{ + bool disable_execute = true; + int pkey1, pkey2, pkey3; + int *ptr, ret; + + /* Wait until parent fills out the initial register values. */ + ret = wait_parent(&info->child_sync); + if (ret) + return ret; + + ret = increase_core_file_limit(); + FAIL_IF(ret); + + /* Get some pkeys so that we can change their bits in the AMR. */ + pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); + if (pkey1 < 0) { + pkey1 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey1 < 0); + + disable_execute = false; + } + + pkey2 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey2 < 0); + + pkey3 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey3 < 0); + + info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2); + + if (disable_execute) + info->iamr |= 1ul << pkeyshift(pkey1); + + info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); + + printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", + user_write, info->amr, pkey1, pkey2, pkey3); + + mtspr(SPRN_AMR, info->amr); + + /* + * We won't use pkey3. This tests whether the kernel restores the UAMOR + * permissions after a key is freed. + */ + sys_pkey_free(pkey3); + + info->core_time = time(NULL); + + /* Crash. */ + ptr = 0; + *ptr = 1; + + /* Shouldn't get here. */ + FAIL_IF(true); + + return TEST_FAIL; +} + +/* Return file size if filename exists and pass sanity check, or zero if not. */ +static off_t try_core_file(const char *filename, struct shared_info *info, + pid_t pid) +{ + struct stat buf; + int ret; + + ret = stat(filename, &buf); + if (ret == -1) + return TEST_FAIL; + + /* Make sure we're not using a stale core file. */ + return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL; +} + +static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr) +{ + return (void *) nhdr + sizeof(*nhdr) + + __ALIGN_KERNEL(nhdr->n_namesz, 4) + + __ALIGN_KERNEL(nhdr->n_descsz, 4); +} + +static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr, + off_t core_size) +{ + unsigned long *regs; + Elf64_Phdr *phdr; + Elf64_Nhdr *nhdr; + size_t phdr_size; + void *p = ehdr, *note; + int ret; + + ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG); + FAIL_IF(ret); + + FAIL_IF(ehdr->e_type != ET_CORE); + FAIL_IF(ehdr->e_machine != EM_PPC64); + FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0); + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(*phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff); + FAIL_IF(ehdr->e_phoff + phdr_size > core_size); + + /* Find the PT_NOTE segment. */ + for (phdr = p + ehdr->e_phoff; + (void *) phdr < p + ehdr->e_phoff + phdr_size; + phdr += ehdr->e_phentsize) + if (phdr->p_type == PT_NOTE) + break; + + FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size); + + /* Find the NT_PPC_PKEY note. */ + for (nhdr = p + phdr->p_offset; + (void *) nhdr < p + phdr->p_offset + phdr->p_filesz; + nhdr = next_note(nhdr)) + if (nhdr->n_type == NT_PPC_PKEY) + break; + + FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz); + FAIL_IF(nhdr->n_descsz == 0); + + p = nhdr; + note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4); + + regs = (unsigned long *) note; + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + core_read_running, regs[0], regs[1], regs[2]); + + FAIL_IF(regs[0] != info->amr); + FAIL_IF(regs[1] != info->iamr); + FAIL_IF(regs[2] != info->uamor); + + return TEST_PASS; +} + +static int parent(struct shared_info *info, pid_t pid) +{ + char *filenames, *filename[3]; + int fd, i, ret, status; + unsigned long regs[3]; + off_t core_size; + void *core; + + /* + * Get the initial values for AMR, IAMR and UAMOR and communicate them + * to the child. + */ + ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + info->amr = regs[0]; + info->iamr = regs[1]; + info->uamor = regs[2]; + + /* Wake up child so that it can set itself up. */ + ret = prod_child(&info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + ret = wait(&status); + if (ret != pid) { + printf("Child's exit status not captured\n"); + return TEST_FAIL; + } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) { + printf("Child didn't dump core\n"); + return TEST_FAIL; + } + + /* Construct array of core file names to try. */ + + filename[0] = filenames = malloc(PATH_MAX); + if (!filenames) { + perror("Error allocating memory"); + return TEST_FAIL; + } + + ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid); + if (ret < 0 || ret >= PATH_MAX) { + ret = TEST_FAIL; + goto out; + } + + filename[1] = filename[0] + ret + 1; + ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid); + if (ret < 0 || ret >= PATH_MAX - ret - 1) { + ret = TEST_FAIL; + goto out; + } + filename[2] = "core"; + + for (i = 0; i < 3; i++) { + core_size = try_core_file(filename[i], info, pid); + if (core_size != TEST_FAIL) + break; + } + + if (i == 3) { + printf("Couldn't find core file\n"); + ret = TEST_FAIL; + goto out; + } + + fd = open(filename[i], O_RDONLY); + if (fd == -1) { + perror("Error opening core file"); + ret = TEST_FAIL; + goto out; + } + + core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (core == (void *) -1) { + perror("Error mmaping core file"); + ret = TEST_FAIL; + goto out; + } + + ret = check_core_file(info, core, core_size); + + munmap(core, core_size); + close(fd); + unlink(filename[i]); + + out: + free(filenames); + + return ret; +} + +static int write_core_pattern(const char *core_pattern) +{ + size_t len = strlen(core_pattern), ret; + FILE *f; + + f = fopen(core_pattern_file, "w"); + if (!f) { + perror("Error writing to core_pattern file"); + return TEST_FAIL; + } + + ret = fwrite(core_pattern, 1, len, f); + fclose(f); + if (ret != len) { + perror("Error writing to core_pattern file"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +static int setup_core_pattern(char **core_pattern_, bool *changed_) +{ + FILE *f; + char *core_pattern; + int ret; + + core_pattern = malloc(PATH_MAX); + if (!core_pattern) { + perror("Error allocating memory"); + return TEST_FAIL; + } + + f = fopen(core_pattern_file, "r"); + if (!f) { + perror("Error opening core_pattern file"); + ret = TEST_FAIL; + goto out; + } + + ret = fread(core_pattern, 1, PATH_MAX, f); + fclose(f); + if (!ret) { + perror("Error reading core_pattern file"); + ret = TEST_FAIL; + goto out; + } + + /* Check whether we can predict the name of the core file. */ + if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) + *changed_ = false; + else { + ret = write_core_pattern("core-pkey.%p"); + if (ret) + goto out; + + *changed_ = true; + } + + *core_pattern_ = core_pattern; + ret = TEST_PASS; + + out: + if (ret) + free(core_pattern); + + return ret; +} + +static int core_pkey(void) +{ + char *core_pattern; + bool changed_core_pattern; + struct shared_info *info; + int shm_id; + int ret; + pid_t pid; + + ret = setup_core_pattern(&core_pattern, &changed_core_pattern); + if (ret) + return ret; + + shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT); + info = shmat(shm_id, NULL, 0); + + ret = init_child_sync(&info->child_sync); + if (ret) + return ret; + + pid = fork(); + if (pid < 0) { + perror("fork() failed"); + ret = TEST_FAIL; + } else if (pid == 0) + ret = child(info); + else + ret = parent(info, pid); + + shmdt(info); + + if (pid) { + destroy_child_sync(&info->child_sync); + shmctl(shm_id, IPC_RMID, NULL); + + if (changed_core_pattern) + write_core_pattern(core_pattern); + } + + free(core_pattern); + + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(core_pkey, "core_pkey"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c new file mode 100644 index 000000000000..60df0b5e628a --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -0,0 +1,195 @@ +/* + * perf events self profiling example test case for hw breakpoints. + * + * This tests perf PERF_TYPE_BREAKPOINT parameters + * 1) tests all variants of the break on read/write flags + * 2) tests exclude_user == 0 and 1 + * 3) test array matches (if DAWR is supported)) + * 4) test different numbers of breakpoints matches + * + * Configure this breakpoint, then read and write the data a number of + * times. Then check the output count from perf is as expected. + * + * Based on: + * http://ozlabs.org/~anton/junkcode/perf_events_example1.c + * + * Copyright (C) 2018 Michael Neuling, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <unistd.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <elf.h> +#include <pthread.h> +#include <sys/syscall.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include "utils.h" + +#define MAX_LOOPS 10000 + +#define DAWR_LENGTH_MAX ((0x3f + 1) * 8) + +static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, + unsigned long flags) +{ + attr->size = sizeof(*attr); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static inline bool breakpoint_test(int len) +{ + struct perf_event_attr attr; + int fd; + + /* setup counters */ + memset(&attr, 0, sizeof(attr)); + attr.disabled = 1; + attr.type = PERF_TYPE_BREAKPOINT; + attr.bp_type = HW_BREAKPOINT_R; + /* bp_addr can point anywhere but needs to be aligned */ + attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; + attr.bp_len = len; + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd < 0) + return false; + close(fd); + return true; +} + +static inline bool perf_breakpoint_supported(void) +{ + return breakpoint_test(4); +} + +static inline bool dawr_supported(void) +{ + return breakpoint_test(DAWR_LENGTH_MAX); +} + +static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) +{ + int i,j; + struct perf_event_attr attr; + size_t res; + unsigned long long breaks, needed; + int readint; + int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)]; + int *readintalign; + volatile int *ptr; + int break_fd; + int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ + volatile int *k; + + /* align to 0x400 boundary as required by DAWR */ + readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & + 0xfffffffffffff800); + + ptr = &readint; + if (arraytest) + ptr = &readintalign[0]; + + /* setup counters */ + memset(&attr, 0, sizeof(attr)); + attr.disabled = 1; + attr.type = PERF_TYPE_BREAKPOINT; + attr.bp_type = readwriteflag; + attr.bp_addr = (__u64)ptr; + attr.bp_len = sizeof(int); + if (arraytest) + attr.bp_len = DAWR_LENGTH_MAX; + attr.exclude_user = exclude_user; + break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (break_fd < 0) { + perror("sys_perf_event_open"); + exit(1); + } + + /* start counters */ + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + + /* Test a bunch of reads and writes */ + k = &readint; + for (i = 0; i < loop_num; i++) { + if (arraytest) + k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]); + + j = *k; + *k = j; + } + + /* stop counters */ + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + + /* read and check counters */ + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + /* we read and write each loop, so subtract the ones we are counting */ + needed = 0; + if (readwriteflag & HW_BREAKPOINT_R) + needed += loop_num; + if (readwriteflag & HW_BREAKPOINT_W) + needed += loop_num; + needed = needed * (1 - exclude_user); + printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n", + (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest); + if (breaks != needed) { + printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n", + (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user); + return 1; + } + close(break_fd); + + return 0; +} + +static int runtest(void) +{ + int rwflag; + int exclude_user; + int ret; + + /* + * perf defines rwflag as two bits read and write and at least + * one must be set. So range 1-3. + */ + for (rwflag = 1 ; rwflag < 4; rwflag++) { + for (exclude_user = 0 ; exclude_user < 2; exclude_user++) { + ret = runtestsingle(rwflag, exclude_user, 0); + if (ret) + return ret; + + /* if we have the dawr, we can do an array test */ + if (!dawr_supported()) + continue; + ret = runtestsingle(rwflag, exclude_user, 1); + if (ret) + return ret; + } + } + return 0; +} + + +static int perf_hwbreak(void) +{ + srand ( time(NULL) ); + + SKIP_IF(!perf_breakpoint_supported()); + + return runtest(); +} + +int main(int argc, char *argv[], char **envp) +{ + return test_harness(perf_hwbreak, "perf_hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c new file mode 100644 index 000000000000..3066d310f32b --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Ptrace test for hw breakpoints + * + * Based on tools/testing/selftests/breakpoints/breakpoint_test.c + * + * This test forks and the parent then traces the child doing various + * types of ptrace enabled breakpoints + * + * Copyright (C) 2018 Michael Neuling, IBM Corporation. + */ + +#include <sys/ptrace.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/user.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "ptrace.h" + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; +static struct ppc_debug_info dbginfo; + +static void get_dbginfo(void) +{ + int ret; + + ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (ret) { + perror("Can't get breakpoint info\n"); + exit(-1); + } +} + +static bool hwbreak_present(void) +{ + return (dbginfo.num_data_bps != 0); +} + +static bool dawr_present(void) +{ + return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); +} + +static void set_breakpoint_addr(void *addr) +{ + int ret; + + ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static int set_hwbreakpoint_addr(void *addr, int range) +{ + int ret; + + struct ppc_hw_breakpoint info; + + info.version = 1; + info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; + info.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + if (range > 0) + info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info.addr = (__u64)addr; + info.addr2 = (__u64)addr + range; + info.condition_value = 0; + + ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + if (ret < 0) { + perror("Can't set breakpoint\n"); + exit(-1); + } + return ret; +} + +static int del_hwbreakpoint_addr(int watchpoint_handle) +{ + int ret; + + ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle); + if (ret < 0) { + perror("Can't delete hw breakpoint\n"); + exit(-1); + } + return ret; +} + +#define DAWR_LENGTH_MAX 512 + +/* Dummy variables to test read/write accesses */ +static unsigned long long + dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)] + __attribute__((aligned(512))); +static unsigned long long *dummy_var = dummy_array; + +static void write_var(int len) +{ + long long *plval; + char *pcval; + short *psval; + int *pival; + + switch (len) { + case 1: + pcval = (char *)dummy_var; + *pcval = 0xff; + break; + case 2: + psval = (short *)dummy_var; + *psval = 0xffff; + break; + case 4: + pival = (int *)dummy_var; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)dummy_var; + *plval = 0xffffffffffffffffLL; + break; + } +} + +static void read_var(int len) +{ + char cval __attribute__((unused)); + short sval __attribute__((unused)); + int ival __attribute__((unused)); + long long lval __attribute__((unused)); + + switch (len) { + case 1: + cval = *(char *)dummy_var; + break; + case 2: + sval = *(short *)dummy_var; + break; + case 4: + ival = *(int *)dummy_var; + break; + case 8: + lval = *(long long *)dummy_var; + break; + } +} + +/* + * Do the r/w accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); + + /* Test when breakpoint is unset */ + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + msg2 = "Child process hit the breakpoint"; + } + + printf("%s Result: [%s]\n", msg, msg2); +} + +static void launch_watchpoints(char *buf, int mode, int len, + struct ppc_debug_info *dbginfo, bool dawr) +{ + const char *mode_str; + unsigned long data = (unsigned long)(dummy_var); + int wh, range; + + data &= ~0x7UL; + + if (mode == BP_W) { + data |= (1UL << 1); + mode_str = "write"; + } else { + data |= (1UL << 0); + data |= (1UL << 1); + mode_str = "read"; + } + + /* Set DABR_TRANSLATION bit */ + data |= (1UL << 2); + + /* use PTRACE_SET_DEBUGREG breakpoints */ + set_breakpoint_addr((void *)data); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + set_breakpoint_addr(NULL); + + data = (data & ~7); /* remove dabr control bits */ + + /* use PPC_PTRACE_SETHWDEBUG breakpoint */ + if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + return; /* not supported */ + wh = set_hwbreakpoint_addr((void *)data, 0); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); + + /* try a wider range */ + range = 8; + if (dawr) + range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1)); + wh = set_hwbreakpoint_addr((void *)data, range); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); +} + +/* Set the breakpoints and check the child successfully trigger them */ +static int launch_tests(bool dawr) +{ + char buf[1024]; + int len, i, status; + + struct ppc_debug_info dbginfo; + + i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (i) { + perror("Can't set breakpoint info\n"); + exit(-1); + } + if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n"); + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_W, len, &dbginfo, dawr); + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + + /* + * Now we have unregistered the breakpoint, access by child + * should not cause SIGTRAP. + */ + + wait(&status); + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + printf("FAIL: Child process hit the breakpoint, which is not expected\n"); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return TEST_FAIL; + } + + if (WIFEXITED(status)) + printf("Child exited normally\n"); + + return TEST_PASS; +} + +static int ptrace_hwbreak(void) +{ + pid_t pid; + int ret; + bool dawr; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + wait(NULL); + + child_pid = pid; + + get_dbginfo(); + SKIP_IF(!hwbreak_present()); + dawr = dawr_present(); + + ret = launch_tests(dawr); + + wait(NULL); + + return ret; +} + +int main(int argc, char **argv, char **envp) +{ + return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c new file mode 100644 index 000000000000..5cf631f792cc --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Ptrace test for Memory Protection Key registers + * + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. + * Copyright (C) 2018 IBM Corporation. + */ +#include "ptrace.h" +#include "child.h" + +#ifndef __NR_pkey_alloc +#define __NR_pkey_alloc 384 +#endif + +#ifndef __NR_pkey_free +#define __NR_pkey_free 385 +#endif + +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + +#ifndef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 +#endif + +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + +static const char user_read[] = "[User Read (Running)]"; +static const char user_write[] = "[User Write (Running)]"; +static const char ptrace_read_running[] = "[Ptrace Read (Running)]"; +static const char ptrace_write_running[] = "[Ptrace Write (Running)]"; + +/* Information shared between the parent and the child. */ +struct shared_info { + struct child_sync child_sync; + + /* AMR value the parent expects to read from the child. */ + unsigned long amr1; + + /* AMR value the parent is expected to write to the child. */ + unsigned long amr2; + + /* AMR value that ptrace should refuse to write to the child. */ + unsigned long amr3; + + /* IAMR value the parent expects to read from the child. */ + unsigned long expected_iamr; + + /* UAMOR value the parent expects to read from the child. */ + unsigned long expected_uamor; + + /* + * IAMR and UAMOR values that ptrace should refuse to write to the child + * (even though they're valid ones) because userspace doesn't have + * access to those registers. + */ + unsigned long new_iamr; + unsigned long new_uamor; +}; + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) +{ + return syscall(__NR_pkey_alloc, flags, init_access_rights); +} + +static int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +static int child(struct shared_info *info) +{ + unsigned long reg; + bool disable_execute = true; + int pkey1, pkey2, pkey3; + int ret; + + /* Wait until parent fills out the initial register values. */ + ret = wait_parent(&info->child_sync); + if (ret) + return ret; + + /* Get some pkeys so that we can change their bits in the AMR. */ + pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); + if (pkey1 < 0) { + pkey1 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey1 < 0, &info->child_sync); + + disable_execute = false; + } + + pkey2 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey2 < 0, &info->child_sync); + + pkey3 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey3 < 0, &info->child_sync); + + info->amr1 |= 3ul << pkeyshift(pkey1); + info->amr2 |= 3ul << pkeyshift(pkey2); + info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3); + + if (disable_execute) + info->expected_iamr |= 1ul << pkeyshift(pkey1); + + info->expected_uamor |= 3ul << pkeyshift(pkey1) | + 3ul << pkeyshift(pkey2); + info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); + info->new_uamor |= 3ul << pkeyshift(pkey1); + + /* + * We won't use pkey3. We just want a plausible but invalid key to test + * whether ptrace will let us write to AMR bits we are not supposed to. + * + * This also tests whether the kernel restores the UAMOR permissions + * after a key is freed. + */ + sys_pkey_free(pkey3); + + printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", + user_write, info->amr1, pkey1, pkey2, pkey3); + + mtspr(SPRN_AMR, info->amr1); + + /* Wait for parent to read our AMR value and write a new one. */ + ret = prod_parent(&info->child_sync); + CHILD_FAIL_IF(ret, &info->child_sync); + + ret = wait_parent(&info->child_sync); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, &info->child_sync); + + /* + * Wait for parent to try to write an invalid AMR value. + */ + ret = prod_parent(&info->child_sync); + CHILD_FAIL_IF(ret, &info->child_sync); + + ret = wait_parent(&info->child_sync); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, &info->child_sync); + + /* + * Wait for parent to try to write an IAMR and a UAMOR value. We can't + * verify them, but we can verify that the AMR didn't change. + */ + ret = prod_parent(&info->child_sync); + CHILD_FAIL_IF(ret, &info->child_sync); + + ret = wait_parent(&info->child_sync); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, &info->child_sync); + + /* Now let parent now that we are finished. */ + + ret = prod_parent(&info->child_sync); + CHILD_FAIL_IF(ret, &info->child_sync); + + return TEST_PASS; +} + +static int parent(struct shared_info *info, pid_t pid) +{ + unsigned long regs[3]; + int ret, status; + + /* + * Get the initial values for AMR, IAMR and UAMOR and communicate them + * to the child. + */ + ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + info->amr1 = info->amr2 = info->amr3 = regs[0]; + info->expected_iamr = info->new_iamr = regs[1]; + info->expected_uamor = info->new_uamor = regs[2]; + + /* Wake up child so that it can set itself up. */ + ret = prod_child(&info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + ret = wait_child(&info->child_sync); + if (ret) + return ret; + + /* Verify that we can read the pkey registers from the child. */ + ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); + PARENT_FAIL_IF(ret, &info->child_sync); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_read_running, regs[0], regs[1], regs[2]); + + PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync); + PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync); + PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync); + + /* Write valid AMR value in child. */ + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1); + PARENT_FAIL_IF(ret, &info->child_sync); + + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2); + + /* Wake up child so that it can verify it changed. */ + ret = prod_child(&info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + ret = wait_child(&info->child_sync); + if (ret) + return ret; + + /* Write invalid AMR value in child. */ + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1); + PARENT_FAIL_IF(ret, &info->child_sync); + + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + + /* Wake up child so that it can verify it didn't change. */ + ret = prod_child(&info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + ret = wait_child(&info->child_sync); + if (ret) + return ret; + + /* Try to write to IAMR. */ + regs[0] = info->amr1; + regs[1] = info->new_iamr; + ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2); + PARENT_FAIL_IF(!ret, &info->child_sync); + + printf("%-30s AMR: %016lx IAMR: %016lx\n", + ptrace_write_running, regs[0], regs[1]); + + /* Try to write to IAMR and UAMOR. */ + regs[2] = info->new_uamor; + ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3); + PARENT_FAIL_IF(!ret, &info->child_sync); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_write_running, regs[0], regs[1], regs[2]); + + /* Verify that all registers still have their expected values. */ + ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); + PARENT_FAIL_IF(ret, &info->child_sync); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_read_running, regs[0], regs[1], regs[2]); + + PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync); + PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync); + PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync); + + /* Wake up child so that it can verify AMR didn't change and wrap up. */ + ret = prod_child(&info->child_sync); + PARENT_FAIL_IF(ret, &info->child_sync); + + ret = wait(&status); + if (ret != pid) { + printf("Child's exit status not captured\n"); + ret = TEST_PASS; + } else if (!WIFEXITED(status)) { + printf("Child exited abnormally\n"); + ret = TEST_FAIL; + } else + ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS; + + return ret; +} + +static int ptrace_pkey(void) +{ + struct shared_info *info; + int shm_id; + int ret; + pid_t pid; + + shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT); + info = shmat(shm_id, NULL, 0); + + ret = init_child_sync(&info->child_sync); + if (ret) + return ret; + + pid = fork(); + if (pid < 0) { + perror("fork() failed"); + ret = TEST_FAIL; + } else if (pid == 0) + ret = child(info); + else + ret = parent(info, pid); + + shmdt(info); + + if (pid) { + destroy_child_sync(&info->child_sync); + shmctl(shm_id, IPC_RMID, NULL); + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_pkey, "ptrace_pkey"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 19fb825270a1..34201cfa8335 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -102,6 +102,44 @@ int cont_trace(pid_t child) return TEST_PASS; } +int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[], + int n) +{ + struct iovec iov; + long ret; + + FAIL_IF(start_trace(child)); + + iov.iov_base = regs; + iov.iov_len = n * sizeof(unsigned long); + + ret = ptrace(PTRACE_GETREGSET, child, type, &iov); + if (ret) + return ret; + + FAIL_IF(stop_trace(child)); + + return TEST_PASS; +} + +long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[], + int n) +{ + struct iovec iov; + long ret; + + FAIL_IF(start_trace(child)); + + iov.iov_base = regs; + iov.iov_len = n * sizeof(unsigned long); + + ret = ptrace(PTRACE_SETREGSET, child, type, &iov); + + FAIL_IF(stop_trace(child)); + + return ret; +} + /* TAR, PPR, DSCR */ int show_tar_registers(pid_t child, unsigned long *out) { diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index bb90d4b79524..c3ee8393dae8 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -14,3 +14,4 @@ tm-signal-context-chk-vsx tm-vmx-unavail tm-unavailable tm-trap +tm-sigreturn diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 6c16f77c722c..74e5912e9f2e 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -1,3 +1,6 @@ +/fd-001-lookup +/fd-002-posix-eq +/fd-003-kthread /proc-loadavg-001 /proc-self-map-files-001 /proc-self-map-files-002 diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index dbb87e56264c..db310eedc268 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,6 +1,9 @@ -CFLAGS += -Wall -O2 +CFLAGS += -Wall -O2 -Wno-unused-function TEST_GEN_PROGS := +TEST_GEN_PROGS += fd-001-lookup +TEST_GEN_PROGS += fd-002-posix-eq +TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c new file mode 100644 index 000000000000..a2010dfb2110 --- /dev/null +++ b/tools/testing/selftests/proc/fd-001-lookup.c @@ -0,0 +1,168 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test /proc/*/fd lookup. +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc.h" + +/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */ +static void test_lookup_pass(const char *pathname) +{ + struct stat st; + ssize_t rv; + + memset(&st, 0, sizeof(struct stat)); + rv = lstat(pathname, &st); + assert(rv == 0); + assert(S_ISLNK(st.st_mode)); +} + +static void test_lookup_fail(const char *pathname) +{ + struct stat st; + ssize_t rv; + + rv = lstat(pathname, &st); + assert(rv == -1 && errno == ENOENT); +} + +static void test_lookup(unsigned int fd) +{ + char buf[64]; + unsigned int c; + unsigned int u; + int i; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); + test_lookup_pass(buf); + + /* leading junk */ + for (c = 1; c <= 255; c++) { + if (c == '/') + continue; + snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd); + test_lookup_fail(buf); + } + + /* trailing junk */ + for (c = 1; c <= 255; c++) { + if (c == '/') + continue; + snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c); + test_lookup_fail(buf); + } + + for (i = INT_MIN; i < INT_MIN + 1024; i++) { + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i); + test_lookup_fail(buf); + } + for (i = -1024; i < 0; i++) { + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i); + test_lookup_fail(buf); + } + for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) { + snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u); + test_lookup_fail(buf); + } + for (u = UINT_MAX - 1024; u != 0; u++) { + snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u); + test_lookup_fail(buf); + } + + +} + +int main(void) +{ + struct dirent *de; + unsigned int fd, target_fd; + + if (unshare(CLONE_FILES) == -1) + return 1; + + /* Wipe fdtable. */ + do { + DIR *d; + + d = opendir("/proc/self/fd"); + if (!d) + return 1; + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, ".")); + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, "..")); +next: + de = xreaddir(d); + if (de) { + unsigned long long fd_ull; + unsigned int fd; + char *end; + + assert(de->d_type == DT_LNK); + + fd_ull = xstrtoull(de->d_name, &end); + assert(*end == '\0'); + assert(fd_ull == (unsigned int)fd_ull); + + fd = fd_ull; + if (fd == dirfd(d)) + goto next; + close(fd); + } + + closedir(d); + } while (de); + + /* Now fdtable is clean. */ + + fd = open("/", O_PATH|O_DIRECTORY); + assert(fd == 0); + test_lookup(fd); + close(fd); + + /* Clean again! */ + + fd = open("/", O_PATH|O_DIRECTORY); + assert(fd == 0); + /* Default RLIMIT_NOFILE-1 */ + target_fd = 1023; + while (target_fd > 0) { + if (dup2(fd, target_fd) == target_fd) + break; + target_fd /= 2; + } + assert(target_fd > 0); + close(fd); + test_lookup(target_fd); + close(target_fd); + + return 0; +} diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c new file mode 100644 index 000000000000..417322ca9c53 --- /dev/null +++ b/tools/testing/selftests/proc/fd-002-posix-eq.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that open(/proc/*/fd/*) opens the same file. +#undef NDEBUG +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +int main(void) +{ + int fd0, fd1, fd2; + struct stat st0, st1, st2; + char buf[64]; + int rv; + + fd0 = open("/", O_DIRECTORY|O_RDONLY); + assert(fd0 >= 0); + + snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0); + fd1 = open(buf, O_RDONLY); + assert(fd1 >= 0); + + snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0); + fd2 = open(buf, O_RDONLY); + assert(fd2 >= 0); + + rv = fstat(fd0, &st0); + assert(rv == 0); + rv = fstat(fd1, &st1); + assert(rv == 0); + rv = fstat(fd2, &st2); + assert(rv == 0); + + assert(st0.st_dev == st1.st_dev); + assert(st0.st_ino == st1.st_ino); + + assert(st0.st_dev == st2.st_dev); + assert(st0.st_ino == st2.st_ino); + + return 0; +} diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c new file mode 100644 index 000000000000..1d659d55368c --- /dev/null +++ b/tools/testing/selftests/proc/fd-003-kthread.c @@ -0,0 +1,178 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that /proc/$KERNEL_THREAD/fd/ is empty. +#define _GNU_SOURCE +#undef NDEBUG +#include <sys/syscall.h> +#include <assert.h> +#include <dirent.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "proc.h" + +#define PF_KHTREAD 0x00200000 + +/* + * Test for kernel threadness atomically with openat(). + * + * Return /proc/$PID/fd descriptor if process is kernel thread. + * Return -1 if a process is userspace process. + */ +static int kernel_thread_fd(unsigned int pid) +{ + unsigned int flags = 0; + char buf[4096]; + int dir_fd, fd; + ssize_t rv; + + snprintf(buf, sizeof(buf), "/proc/%u", pid); + dir_fd = open(buf, O_RDONLY|O_DIRECTORY); + if (dir_fd == -1) + return -1; + + /* + * Believe it or not, struct task_struct::flags is directly exposed + * to userspace! + */ + fd = openat(dir_fd, "stat", O_RDONLY); + if (fd == -1) { + close(dir_fd); + return -1; + } + rv = read(fd, buf, sizeof(buf)); + close(fd); + if (0 < rv && rv <= sizeof(buf)) { + unsigned long long flags_ull; + char *p, *end; + int i; + + assert(buf[rv - 1] == '\n'); + buf[rv - 1] = '\0'; + + /* Search backwards: ->comm can contain whitespace and ')'. */ + for (i = 0; i < 43; i++) { + p = strrchr(buf, ' '); + assert(p); + *p = '\0'; + } + + p = strrchr(buf, ' '); + assert(p); + + flags_ull = xstrtoull(p + 1, &end); + assert(*end == '\0'); + assert(flags_ull == (unsigned int)flags_ull); + + flags = flags_ull; + } + + fd = -1; + if (flags & PF_KHTREAD) { + fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY); + } + close(dir_fd); + return fd; +} + +static void test_readdir(int fd) +{ + DIR *d; + struct dirent *de; + + d = fdopendir(fd); + assert(d); + + de = xreaddir(d); + assert(streq(de->d_name, ".")); + assert(de->d_type == DT_DIR); + + de = xreaddir(d); + assert(streq(de->d_name, "..")); + assert(de->d_type == DT_DIR); + + de = xreaddir(d); + assert(!de); +} + +static inline int sys_statx(int dirfd, const char *pathname, int flags, + unsigned int mask, void *stx) +{ + return syscall(SYS_statx, dirfd, pathname, flags, mask, stx); +} + +static void test_lookup_fail(int fd, const char *pathname) +{ + char stx[256] __attribute__((aligned(8))); + int rv; + + rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx); + assert(rv == -1 && errno == ENOENT); +} + +static void test_lookup(int fd) +{ + char buf[64]; + unsigned int u; + int i; + + for (i = INT_MIN; i < INT_MIN + 1024; i++) { + snprintf(buf, sizeof(buf), "%d", i); + test_lookup_fail(fd, buf); + } + for (i = -1024; i < 1024; i++) { + snprintf(buf, sizeof(buf), "%d", i); + test_lookup_fail(fd, buf); + } + for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) { + snprintf(buf, sizeof(buf), "%u", u); + test_lookup_fail(fd, buf); + } + for (u = UINT_MAX - 1024; u != 0; u++) { + snprintf(buf, sizeof(buf), "%u", u); + test_lookup_fail(fd, buf); + } +} + +int main(void) +{ + unsigned int pid; + int fd; + + /* + * In theory this will loop indefinitely if kernel threads are exiled + * from /proc. + * + * Start with kthreadd. + */ + pid = 2; + while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) { + pid++; + } + /* EACCES if run as non-root. */ + if (pid >= 1024) + return 1; + + test_readdir(fd); + test_lookup(fd); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h index 0e464b50e9d9..dc6a42b1d6b0 100644 --- a/tools/testing/selftests/proc/proc-uptime.h +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -20,21 +20,7 @@ #include <stdlib.h> #include <unistd.h> -static unsigned long long xstrtoull(const char *p, char **end) -{ - if (*p == '0') { - *end = (char *)p + 1; - return 0; - } else if ('1' <= *p && *p <= '9') { - unsigned long long val; - - errno = 0; - val = strtoull(p, end, 10); - assert(errno == 0); - return val; - } else - assert(0); -} +#include "proc.h" static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) { diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h new file mode 100644 index 000000000000..4e178166fd84 --- /dev/null +++ b/tools/testing/selftests/proc/proc.h @@ -0,0 +1,39 @@ +#pragma once +#undef NDEBUG +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +static inline bool streq(const char *s1, const char *s2) +{ + return strcmp(s1, s2) == 0; +} + +static unsigned long long xstrtoull(const char *p, char **end) +{ + if (*p == '0') { + *end = (char *)p + 1; + return 0; + } else if ('1' <= *p && *p <= '9') { + unsigned long long val; + + errno = 0; + val = strtoull(p, end, 10); + assert(errno == 0); + return val; + } else + assert(0); +} + +static struct dirent *xreaddir(DIR *d) +{ + struct dirent *de; + + errno = 0; + de = readdir(d); + assert(de || errno == 0); + return de; +} diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index 1e73c2232097..563e752e6eba 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -31,22 +31,7 @@ #include <fcntl.h> #include <unistd.h> -static inline bool streq(const char *s1, const char *s2) -{ - return strcmp(s1, s2) == 0; -} - -static struct dirent *xreaddir(DIR *d) -{ - struct dirent *de; - - errno = 0; - de = readdir(d); - if (!de && errno != 0) { - exit(1); - } - return de; -} +#include "proc.h" static void f_reg(DIR *d, const char *filename) { diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests index 6ccb154cb4aa..22f8df1ad7d4 100755 --- a/tools/testing/selftests/pstore/pstore_post_reboot_tests +++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests @@ -7,13 +7,16 @@ # # Released under the terms of the GPL v2. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./common_tests if [ -e $REBOOT_FLAG ]; then rm $REBOOT_FLAG else prlog "pstore_crash_test has not been executed yet. we skip further tests." - exit 0 + exit $ksft_skip fi prlog -n "Mounting pstore filesystem ... " diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh new file mode 100755 index 000000000000..98f650c9bf54 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Invoke a text editor on all console.log files for all runs with diagnostics, +# that is, on all such files having a console.log.diags counterpart. +# Note that both console.log.diags and console.log are passed to the +# editor (currently defaulting to "vi"), allowing the user to get an +# idea of what to search for in the console.log file. +# +# Usage: kvm-find-errors.sh directory +# +# The "directory" above should end with the date/time directory, for example, +# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27". + +rundir="${1}" +if test -z "$rundir" -o ! -d "$rundir" +then + echo Usage: $0 directory +fi +editor=${EDITOR-vi} + +# Find builds with errors +files= +for i in ${rundir}/*/Make.out +do + if egrep -q "error:|warning:" < $i + then + egrep "error:|warning:" < $i > $i.diags + files="$files $i.diags $i" + fi +done +if test -n "$files" +then + $editor $files +else + echo No build errors. +fi +if grep -q -e "--buildonly" < ${rundir}/log +then + echo Build-only run, no console logs to check. +fi + +# Find console logs with errors +files= +for i in ${rundir}/*/console.log +do + if test -r $i.diags + then + files="$files $i.diags $i" + fi +done +if test -n "$files" +then + $editor $files +else + echo No errors in console logs. +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index c2e1bb6d0cba..477ecb1293ab 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -34,11 +34,15 @@ fi configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` +stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | + tail -1 | sed -e 's/^\[[ 0-9.]*] //' | + awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | + tr -d '\012\015'`" if test -z "$ngps" then - echo "$configfile -------" + echo "$configfile ------- " $stopstate else - title="$configfile ------- $ngps grace periods" + title="$configfile ------- $ngps GPs" dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` if test -z "$dur" then @@ -46,9 +50,9 @@ else else ngpsps=`awk -v ngps=$ngps -v dur=$dur ' BEGIN { print ngps / dur }' < /dev/null` - title="$title ($ngpsps per second)" + title="$title ($ngpsps/s)" fi - echo $title + echo $title $stopstate nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` if test -z "$nclosecalls" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index f7e988f369dd..c27e97824163 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -48,10 +48,6 @@ do cat $i/Make.oldconfig.err fi parse-build.sh $i/Make.out $configfile - if test "$TORTURE_SUITE" != rcuperf - then - parse-torture.sh $i/console.log $configfile - fi parse-console.sh $i/console.log $configfile if test -r $i/Warnings then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 5f8fbb0d7c17..c5b0f94341d9 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -267,5 +267,4 @@ then echo Unknown PID, cannot kill qemu command fi -parse-torture.sh $resdir/console.log $title parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 08aa7d50ae0e..17293436f551 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -24,57 +24,146 @@ # # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> +T=${TMPDIR-/tmp}/parse-console.sh.$$ file="$1" title="$2" +trap 'rm -f $T.seq $T.diags' 0 + . functions.sh +# Check for presence and readability of console output file +if test -f "$file" -a -r "$file" +then + : +else + echo $title unreadable console output file: $file + exit 1 +fi if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags -if test -s $1.diags +cat /dev/null > $file.diags + +# Check for proper termination, except that rcuperf runs don't indicate this. +if test "$TORTURE_SUITE" != rcuperf then - print_warning Assertion failure in $file $title - # cat $1.diags + # check for abject failure + + if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file + then + nerrs=`grep --binary-files=text '!!!' $file | + tail -1 | + awk ' + { + for (i=NF-8;i<=NF;i++) + sum+=$i; + } + END { print sum }'` + print_bug $title FAILURE, $nerrs instances + exit + fi + + grep --binary-files=text 'torture:.*ver:' $file | + egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | + sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | + awk ' + BEGIN { + ver = 0; + badseq = 0; + } + + { + if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { + badseqno1 = ver; + badseqno2 = $5; + badseqnr = NR; + badseq = 1; + } + ver = $5 + } + + END { + if (badseq) { + if (badseqno1 == badseqno2 && badseqno2 == ver) + print "GP HANG at " ver " torture stat " badseqnr; + else + print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; + } + }' > $T.seq + + if grep -q SUCCESS $file + then + if test -s $T.seq + then + print_warning $title `cat $T.seq` + echo " " $file + exit 2 + fi + else + if grep -q "_HOTPLUG:" $file + then + print_warning HOTPLUG FAILURES $title `cat $T.seq` + echo " " $file + exit 3 + fi + echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages + if test -s $T.seq + then + print_warning $title `cat $T.seq` + fi + exit 2 + fi +fi | tee -a $file.diags + +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | +grep -v 'ODEBUG: ' | +grep -v 'Warning: unable to open an initial console' > $T.diags +if test -s $T.diags +then + print_warning "Assertion failure in $file $title" + # cat $T.diags summary="" - n_badness=`grep -c Badness $1` + n_badness=`grep -c Badness $file` if test "$n_badness" -ne 0 then summary="$summary Badness: $n_badness" fi - n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` + n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'` if test "$n_warn" -ne 0 then summary="$summary Warnings: $n_warn" fi - n_bugs=`egrep -c 'BUG|Oops:' $1` + n_bugs=`egrep -c 'BUG|Oops:' $file` if test "$n_bugs" -ne 0 then summary="$summary Bugs: $n_bugs" fi - n_calltrace=`grep -c 'Call Trace:' $1` + n_calltrace=`grep -c 'Call Trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" fi - n_lockdep=`grep -c =========== $1` + n_lockdep=`grep -c =========== $file` if test "$n_badness" -ne 0 then summary="$summary lockdep: $n_badness" fi - n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` + n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file` if test "$n_stalls" -ne 0 then summary="$summary Stalls: $n_stalls" fi - n_starves=`grep -c 'rcu_.*kthread starved for' $1` + n_starves=`grep -c 'rcu_.*kthread starved for' $file` if test "$n_starves" -ne 0 then summary="$summary Starves: $n_starves" fi print_warning Summary: $summary -else - rm $1.diags + cat $T.diags >> $file.diags +fi +if ! test -s $file.diags +then + rm -f $file.diags fi diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh deleted file mode 100755 index 5987e50cfeb4..000000000000 --- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# -# Check the console output from a torture run for goodness. -# The "file" is a pathname on the local system, and "title" is -# a text string for error-message purposes. -# -# The file must contain torture output, but can be interspersed -# with other dmesg text, as in console-log output. -# -# Usage: parse-torture.sh file title -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2011 -# -# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> - -T=${TMPDIR-/tmp}/parse-torture.sh.$$ -file="$1" -title="$2" - -trap 'rm -f $T.seq' 0 - -. functions.sh - -# check for presence of torture output file. - -if test -f "$file" -a -r "$file" -then - : -else - echo $title unreadable torture output file: $file - exit 1 -fi - -# check for abject failure - -if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file -then - nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` - print_bug $title FAILURE, $nerrs instances - echo " " $url - exit -fi - -grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | -awk ' -BEGIN { - ver = 0; - badseq = 0; - } - - { - if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { - badseqno1 = ver; - badseqno2 = $5; - badseqnr = NR; - badseq = 1; - } - ver = $5 - } - -END { - if (badseq) { - if (badseqno1 == badseqno2 && badseqno2 == ver) - print "GP HANG at " ver " torture stat " badseqnr; - else - print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; - } - }' > $T.seq - -if grep -q SUCCESS $file -then - if test -s $T.seq - then - print_warning $title $title `cat $T.seq` - echo " " $file - exit 2 - fi -else - if grep -q "_HOTPLUG:" $file - then - print_warning HOTPLUG FAILURES $title `cat $T.seq` - echo " " $file - exit 3 - fi - echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages - if test -s $T.seq - then - print_warning $title `cat $T.seq` - fi - exit 2 -fi diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore new file mode 100644 index 000000000000..cc610da7e369 --- /dev/null +++ b/tools/testing/selftests/rseq/.gitignore @@ -0,0 +1,6 @@ +basic_percpu_ops_test +basic_test +basic_rseq_op_test +param_test +param_test_benchmark +param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile new file mode 100644 index 000000000000..c30c52e1d0d2 --- /dev/null +++ b/tools/testing/selftests/rseq/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT +CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ +LDLIBS += -lpthread + +# Own dependencies because we only want to build against 1st prerequisite, but +# still track changes to header files and depend on shared object. +OVERRIDE_TARGETS = 1 + +TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \ + param_test_benchmark param_test_compare_twice + +TEST_GEN_PROGS_EXTENDED = librseq.so + +TEST_PROGS = run_param_test.sh + +include ../lib.mk + +$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h + $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ + +$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c new file mode 100644 index 000000000000..eb3f6db36d36 --- /dev/null +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "rseq.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct percpu_lock_entry { + intptr_t v; +} __attribute__((aligned(128))); + +struct percpu_lock { + struct percpu_lock_entry c[CPU_SETSIZE]; +}; + +struct test_data_entry { + intptr_t count; +} __attribute__((aligned(128))); + +struct spinlock_test_data { + struct percpu_lock lock; + struct test_data_entry c[CPU_SETSIZE]; + int reps; +}; + +struct percpu_list_node { + intptr_t data; + struct percpu_list_node *next; +}; + +struct percpu_list_entry { + struct percpu_list_node *head; +} __attribute__((aligned(128))); + +struct percpu_list { + struct percpu_list_entry c[CPU_SETSIZE]; +}; + +/* A simple percpu spinlock. Returns the cpu lock was acquired on. */ +int rseq_this_cpu_lock(struct percpu_lock *lock) +{ + int cpu; + + for (;;) { + int ret; + + cpu = rseq_cpu_start(); + ret = rseq_cmpeqv_storev(&lock->c[cpu].v, + 0, 1, cpu); + if (rseq_likely(!ret)) + break; + /* Retry if comparison fails or rseq aborts. */ + } + /* + * Acquire semantic when taking lock after control dependency. + * Matches rseq_smp_store_release(). + */ + rseq_smp_acquire__after_ctrl_dep(); + return cpu; +} + +void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) +{ + assert(lock->c[cpu].v == 1); + /* + * Release lock, with release semantic. Matches + * rseq_smp_acquire__after_ctrl_dep(). + */ + rseq_smp_store_release(&lock->c[cpu].v, 0); +} + +void *test_percpu_spinlock_thread(void *arg) +{ + struct spinlock_test_data *data = arg; + int i, cpu; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + for (i = 0; i < data->reps; i++) { + cpu = rseq_this_cpu_lock(&data->lock); + data->c[cpu].count++; + rseq_percpu_unlock(&data->lock, cpu); + } + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + return NULL; +} + +/* + * A simple test which implements a sharded counter using a per-cpu + * lock. Obviously real applications might prefer to simply use a + * per-cpu increment; however, this is reasonable for a test and the + * lock can be extended to synchronize more complicated operations. + */ +void test_percpu_spinlock(void) +{ + const int num_threads = 200; + int i; + uint64_t sum; + pthread_t test_threads[num_threads]; + struct spinlock_test_data data; + + memset(&data, 0, sizeof(data)); + data.reps = 5000; + + for (i = 0; i < num_threads; i++) + pthread_create(&test_threads[i], NULL, + test_percpu_spinlock_thread, &data); + + for (i = 0; i < num_threads; i++) + pthread_join(test_threads[i], NULL); + + sum = 0; + for (i = 0; i < CPU_SETSIZE; i++) + sum += data.c[i].count; + + assert(sum == (uint64_t)data.reps * num_threads); +} + +void this_cpu_list_push(struct percpu_list *list, + struct percpu_list_node *node, + int *_cpu) +{ + int cpu; + + for (;;) { + intptr_t *targetptr, newval, expect; + int ret; + + cpu = rseq_cpu_start(); + /* Load list->c[cpu].head with single-copy atomicity. */ + expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); + newval = (intptr_t)node; + targetptr = (intptr_t *)&list->c[cpu].head; + node->next = (struct percpu_list_node *)expect; + ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); + if (rseq_likely(!ret)) + break; + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; +} + +/* + * Unlike a traditional lock-less linked list; the availability of a + * rseq primitive allows us to implement pop without concerns over + * ABA-type races. + */ +struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, + int *_cpu) +{ + for (;;) { + struct percpu_list_node *head; + intptr_t *targetptr, expectnot, *load; + off_t offset; + int ret, cpu; + + cpu = rseq_cpu_start(); + targetptr = (intptr_t *)&list->c[cpu].head; + expectnot = (intptr_t)NULL; + offset = offsetof(struct percpu_list_node, next); + load = (intptr_t *)&head; + ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, + offset, load, cpu); + if (rseq_likely(!ret)) { + if (_cpu) + *_cpu = cpu; + return head; + } + if (ret > 0) + return NULL; + /* Retry if rseq aborts. */ + } +} + +/* + * __percpu_list_pop is not safe against concurrent accesses. Should + * only be used on lists that are not concurrently modified. + */ +struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) +{ + struct percpu_list_node *node; + + node = list->c[cpu].head; + if (!node) + return NULL; + list->c[cpu].head = node->next; + return node; +} + +void *test_percpu_list_thread(void *arg) +{ + int i; + struct percpu_list *list = (struct percpu_list *)arg; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + for (i = 0; i < 100000; i++) { + struct percpu_list_node *node; + + node = this_cpu_list_pop(list, NULL); + sched_yield(); /* encourage shuffling */ + if (node) + this_cpu_list_push(list, node, NULL); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + return NULL; +} + +/* Simultaneous modification to a per-cpu linked list from many threads. */ +void test_percpu_list(void) +{ + int i, j; + uint64_t sum = 0, expected_sum = 0; + struct percpu_list list; + pthread_t test_threads[200]; + cpu_set_t allowed_cpus; + + memset(&list, 0, sizeof(list)); + + /* Generate list entries for every usable cpu. */ + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); + for (i = 0; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + for (j = 1; j <= 100; j++) { + struct percpu_list_node *node; + + expected_sum += j; + + node = malloc(sizeof(*node)); + assert(node); + node->data = j; + node->next = list.c[i].head; + list.c[i].head = node; + } + } + + for (i = 0; i < 200; i++) + pthread_create(&test_threads[i], NULL, + test_percpu_list_thread, &list); + + for (i = 0; i < 200; i++) + pthread_join(test_threads[i], NULL); + + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + + while ((node = __percpu_list_pop(&list, i))) { + sum += node->data; + free(node); + } + } + + /* + * All entries should now be accounted for (unless some external + * actor is interfering with our allowed affinity while this + * test is running). + */ + assert(sum == expected_sum); +} + +int main(int argc, char **argv) +{ + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto error; + } + printf("spinlock\n"); + test_percpu_spinlock(); + printf("percpu_list\n"); + test_percpu_list(); + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto error; + } + return 0; + +error: + return -1; +} diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c new file mode 100644 index 000000000000..d8efbfb89193 --- /dev/null +++ b/tools/testing/selftests/rseq/basic_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Basic test coverage for critical regions and rseq_current_cpu(). + */ + +#define _GNU_SOURCE +#include <assert.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/time.h> + +#include "rseq.h" + +void test_cpu_pointer(void) +{ + cpu_set_t affinity, test_affinity; + int i; + + sched_getaffinity(0, sizeof(affinity), &affinity); + CPU_ZERO(&test_affinity); + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &affinity)) { + CPU_SET(i, &test_affinity); + sched_setaffinity(0, sizeof(test_affinity), + &test_affinity); + assert(sched_getcpu() == i); + assert(rseq_current_cpu() == i); + assert(rseq_current_cpu_raw() == i); + assert(rseq_cpu_start() == i); + CPU_CLR(i, &test_affinity); + } + } + sched_setaffinity(0, sizeof(affinity), &affinity); +} + +int main(int argc, char **argv) +{ + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto init_thread_error; + } + printf("testing current cpu\n"); + test_cpu_pointer(); + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto init_thread_error; + } + return 0; + +init_thread_error: + return -1; +} diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c new file mode 100644 index 000000000000..615252331813 --- /dev/null +++ b/tools/testing/selftests/rseq/param_test.c @@ -0,0 +1,1284 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> +#include <poll.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#include <stddef.h> + +static inline pid_t gettid(void) +{ + return syscall(__NR_gettid); +} + +#define NR_INJECT 9 +static int loop_cnt[NR_INJECT + 1]; + +static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); +static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); +static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); +static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); +static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); +static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); + +static int opt_modulo, verbose; + +static int opt_yield, opt_signal, opt_sleep, + opt_disable_rseq, opt_threads = 200, + opt_disable_mod = 0, opt_test = 's', opt_mb = 0; + +#ifndef RSEQ_SKIP_FASTPATH +static long long opt_reps = 5000; +#else +static long long opt_reps = 100; +#endif + +static __thread __attribute__((tls_model("initial-exec"))) +unsigned int signals_delivered; + +#ifndef BENCHMARK + +static __thread __attribute__((tls_model("initial-exec"), unused)) +unsigned int yield_mod_cnt, nr_abort; + +#define printf_verbose(fmt, ...) \ + do { \ + if (verbose) \ + printf(fmt, ## __VA_ARGS__); \ + } while (0) + +#if defined(__x86_64__) || defined(__i386__) + +#define INJECT_ASM_REG "eax" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#ifdef __i386__ + +#define RSEQ_INJECT_ASM(n) \ + "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ + "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "dec %%" INJECT_ASM_REG "\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" + +#elif defined(__x86_64__) + +#define RSEQ_INJECT_ASM(n) \ + "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ + "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ + "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "dec %%" INJECT_ASM_REG "\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" + +#else +#error "Unsupported architecture" +#endif + +#elif defined(__ARMEL__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "r4" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "cmp " INJECT_ASM_REG ", #0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "subs " INJECT_ASM_REG ", #1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" + +#elif __PPC__ + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "r18" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" + +#elif defined(__mips__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "$5" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "beqz " INJECT_ASM_REG ", 333f\n\t" \ + "222:\n\t" \ + "addiu " INJECT_ASM_REG ", -1\n\t" \ + "bnez " INJECT_ASM_REG ", 222b\n\t" \ + "333:\n\t" + +#else +#error unsupported target +#endif + +#define RSEQ_INJECT_FAILED \ + nr_abort++; + +#define RSEQ_INJECT_C(n) \ +{ \ + int loc_i, loc_nr_loops = loop_cnt[n]; \ + \ + for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ + rseq_barrier(); \ + } \ + if (loc_nr_loops == -1 && opt_modulo) { \ + if (yield_mod_cnt == opt_modulo - 1) { \ + if (opt_sleep > 0) \ + poll(NULL, 0, opt_sleep); \ + if (opt_yield) \ + sched_yield(); \ + if (opt_signal) \ + raise(SIGUSR1); \ + yield_mod_cnt = 0; \ + } else { \ + yield_mod_cnt++; \ + } \ + } \ +} + +#else + +#define printf_verbose(fmt, ...) + +#endif /* BENCHMARK */ + +#include "rseq.h" + +struct percpu_lock_entry { + intptr_t v; +} __attribute__((aligned(128))); + +struct percpu_lock { + struct percpu_lock_entry c[CPU_SETSIZE]; +}; + +struct test_data_entry { + intptr_t count; +} __attribute__((aligned(128))); + +struct spinlock_test_data { + struct percpu_lock lock; + struct test_data_entry c[CPU_SETSIZE]; +}; + +struct spinlock_thread_test_data { + struct spinlock_test_data *data; + long long reps; + int reg; +}; + +struct inc_test_data { + struct test_data_entry c[CPU_SETSIZE]; +}; + +struct inc_thread_test_data { + struct inc_test_data *data; + long long reps; + int reg; +}; + +struct percpu_list_node { + intptr_t data; + struct percpu_list_node *next; +}; + +struct percpu_list_entry { + struct percpu_list_node *head; +} __attribute__((aligned(128))); + +struct percpu_list { + struct percpu_list_entry c[CPU_SETSIZE]; +}; + +#define BUFFER_ITEM_PER_CPU 100 + +struct percpu_buffer_node { + intptr_t data; +}; + +struct percpu_buffer_entry { + intptr_t offset; + intptr_t buflen; + struct percpu_buffer_node **array; +} __attribute__((aligned(128))); + +struct percpu_buffer { + struct percpu_buffer_entry c[CPU_SETSIZE]; +}; + +#define MEMCPY_BUFFER_ITEM_PER_CPU 100 + +struct percpu_memcpy_buffer_node { + intptr_t data1; + uint64_t data2; +}; + +struct percpu_memcpy_buffer_entry { + intptr_t offset; + intptr_t buflen; + struct percpu_memcpy_buffer_node *array; +} __attribute__((aligned(128))); + +struct percpu_memcpy_buffer { + struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; +}; + +/* A simple percpu spinlock. Grabs lock on current cpu. */ +static int rseq_this_cpu_lock(struct percpu_lock *lock) +{ + int cpu; + + for (;;) { + int ret; + + cpu = rseq_cpu_start(); + ret = rseq_cmpeqv_storev(&lock->c[cpu].v, + 0, 1, cpu); + if (rseq_likely(!ret)) + break; + /* Retry if comparison fails or rseq aborts. */ + } + /* + * Acquire semantic when taking lock after control dependency. + * Matches rseq_smp_store_release(). + */ + rseq_smp_acquire__after_ctrl_dep(); + return cpu; +} + +static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) +{ + assert(lock->c[cpu].v == 1); + /* + * Release lock, with release semantic. Matches + * rseq_smp_acquire__after_ctrl_dep(). + */ + rseq_smp_store_release(&lock->c[cpu].v, 0); +} + +void *test_percpu_spinlock_thread(void *arg) +{ + struct spinlock_thread_test_data *thread_data = arg; + struct spinlock_test_data *data = thread_data->data; + long long i, reps; + + if (!opt_disable_rseq && thread_data->reg && + rseq_register_current_thread()) + abort(); + reps = thread_data->reps; + for (i = 0; i < reps; i++) { + int cpu = rseq_cpu_start(); + + cpu = rseq_this_cpu_lock(&data->lock); + data->c[cpu].count++; + rseq_percpu_unlock(&data->lock, cpu); +#ifndef BENCHMARK + if (i != 0 && !(i % (reps / 10))) + printf_verbose("tid %d: count %lld\n", (int) gettid(), i); +#endif + } + printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", + (int) gettid(), nr_abort, signals_delivered); + if (!opt_disable_rseq && thread_data->reg && + rseq_unregister_current_thread()) + abort(); + return NULL; +} + +/* + * A simple test which implements a sharded counter using a per-cpu + * lock. Obviously real applications might prefer to simply use a + * per-cpu increment; however, this is reasonable for a test and the + * lock can be extended to synchronize more complicated operations. + */ +void test_percpu_spinlock(void) +{ + const int num_threads = opt_threads; + int i, ret; + uint64_t sum; + pthread_t test_threads[num_threads]; + struct spinlock_test_data data; + struct spinlock_thread_test_data thread_data[num_threads]; + + memset(&data, 0, sizeof(data)); + for (i = 0; i < num_threads; i++) { + thread_data[i].reps = opt_reps; + if (opt_disable_mod <= 0 || (i % opt_disable_mod)) + thread_data[i].reg = 1; + else + thread_data[i].reg = 0; + thread_data[i].data = &data; + ret = pthread_create(&test_threads[i], NULL, + test_percpu_spinlock_thread, + &thread_data[i]); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(test_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + sum = 0; + for (i = 0; i < CPU_SETSIZE; i++) + sum += data.c[i].count; + + assert(sum == (uint64_t)opt_reps * num_threads); +} + +void *test_percpu_inc_thread(void *arg) +{ + struct inc_thread_test_data *thread_data = arg; + struct inc_test_data *data = thread_data->data; + long long i, reps; + + if (!opt_disable_rseq && thread_data->reg && + rseq_register_current_thread()) + abort(); + reps = thread_data->reps; + for (i = 0; i < reps; i++) { + int ret; + + do { + int cpu; + + cpu = rseq_cpu_start(); + ret = rseq_addv(&data->c[cpu].count, 1, cpu); + } while (rseq_unlikely(ret)); +#ifndef BENCHMARK + if (i != 0 && !(i % (reps / 10))) + printf_verbose("tid %d: count %lld\n", (int) gettid(), i); +#endif + } + printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", + (int) gettid(), nr_abort, signals_delivered); + if (!opt_disable_rseq && thread_data->reg && + rseq_unregister_current_thread()) + abort(); + return NULL; +} + +void test_percpu_inc(void) +{ + const int num_threads = opt_threads; + int i, ret; + uint64_t sum; + pthread_t test_threads[num_threads]; + struct inc_test_data data; + struct inc_thread_test_data thread_data[num_threads]; + + memset(&data, 0, sizeof(data)); + for (i = 0; i < num_threads; i++) { + thread_data[i].reps = opt_reps; + if (opt_disable_mod <= 0 || (i % opt_disable_mod)) + thread_data[i].reg = 1; + else + thread_data[i].reg = 0; + thread_data[i].data = &data; + ret = pthread_create(&test_threads[i], NULL, + test_percpu_inc_thread, + &thread_data[i]); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(test_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + sum = 0; + for (i = 0; i < CPU_SETSIZE; i++) + sum += data.c[i].count; + + assert(sum == (uint64_t)opt_reps * num_threads); +} + +void this_cpu_list_push(struct percpu_list *list, + struct percpu_list_node *node, + int *_cpu) +{ + int cpu; + + for (;;) { + intptr_t *targetptr, newval, expect; + int ret; + + cpu = rseq_cpu_start(); + /* Load list->c[cpu].head with single-copy atomicity. */ + expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); + newval = (intptr_t)node; + targetptr = (intptr_t *)&list->c[cpu].head; + node->next = (struct percpu_list_node *)expect; + ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); + if (rseq_likely(!ret)) + break; + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; +} + +/* + * Unlike a traditional lock-less linked list; the availability of a + * rseq primitive allows us to implement pop without concerns over + * ABA-type races. + */ +struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, + int *_cpu) +{ + struct percpu_list_node *node = NULL; + int cpu; + + for (;;) { + struct percpu_list_node *head; + intptr_t *targetptr, expectnot, *load; + off_t offset; + int ret; + + cpu = rseq_cpu_start(); + targetptr = (intptr_t *)&list->c[cpu].head; + expectnot = (intptr_t)NULL; + offset = offsetof(struct percpu_list_node, next); + load = (intptr_t *)&head; + ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, + offset, load, cpu); + if (rseq_likely(!ret)) { + node = head; + break; + } + if (ret > 0) + break; + /* Retry if rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; + return node; +} + +/* + * __percpu_list_pop is not safe against concurrent accesses. Should + * only be used on lists that are not concurrently modified. + */ +struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) +{ + struct percpu_list_node *node; + + node = list->c[cpu].head; + if (!node) + return NULL; + list->c[cpu].head = node->next; + return node; +} + +void *test_percpu_list_thread(void *arg) +{ + long long i, reps; + struct percpu_list *list = (struct percpu_list *)arg; + + if (!opt_disable_rseq && rseq_register_current_thread()) + abort(); + + reps = opt_reps; + for (i = 0; i < reps; i++) { + struct percpu_list_node *node; + + node = this_cpu_list_pop(list, NULL); + if (opt_yield) + sched_yield(); /* encourage shuffling */ + if (node) + this_cpu_list_push(list, node, NULL); + } + + printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", + (int) gettid(), nr_abort, signals_delivered); + if (!opt_disable_rseq && rseq_unregister_current_thread()) + abort(); + + return NULL; +} + +/* Simultaneous modification to a per-cpu linked list from many threads. */ +void test_percpu_list(void) +{ + const int num_threads = opt_threads; + int i, j, ret; + uint64_t sum = 0, expected_sum = 0; + struct percpu_list list; + pthread_t test_threads[num_threads]; + cpu_set_t allowed_cpus; + + memset(&list, 0, sizeof(list)); + + /* Generate list entries for every usable cpu. */ + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); + for (i = 0; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + for (j = 1; j <= 100; j++) { + struct percpu_list_node *node; + + expected_sum += j; + + node = malloc(sizeof(*node)); + assert(node); + node->data = j; + node->next = list.c[i].head; + list.c[i].head = node; + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&test_threads[i], NULL, + test_percpu_list_thread, &list); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(test_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + + while ((node = __percpu_list_pop(&list, i))) { + sum += node->data; + free(node); + } + } + + /* + * All entries should now be accounted for (unless some external + * actor is interfering with our allowed affinity while this + * test is running). + */ + assert(sum == expected_sum); +} + +bool this_cpu_buffer_push(struct percpu_buffer *buffer, + struct percpu_buffer_node *node, + int *_cpu) +{ + bool result = false; + int cpu; + + for (;;) { + intptr_t *targetptr_spec, newval_spec; + intptr_t *targetptr_final, newval_final; + intptr_t offset; + int ret; + + cpu = rseq_cpu_start(); + offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + if (offset == buffer->c[cpu].buflen) + break; + newval_spec = (intptr_t)node; + targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; + newval_final = offset + 1; + targetptr_final = &buffer->c[cpu].offset; + if (opt_mb) + ret = rseq_cmpeqv_trystorev_storev_release( + targetptr_final, offset, targetptr_spec, + newval_spec, newval_final, cpu); + else + ret = rseq_cmpeqv_trystorev_storev(targetptr_final, + offset, targetptr_spec, newval_spec, + newval_final, cpu); + if (rseq_likely(!ret)) { + result = true; + break; + } + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; + return result; +} + +struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, + int *_cpu) +{ + struct percpu_buffer_node *head; + int cpu; + + for (;;) { + intptr_t *targetptr, newval; + intptr_t offset; + int ret; + + cpu = rseq_cpu_start(); + /* Load offset with single-copy atomicity. */ + offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + if (offset == 0) { + head = NULL; + break; + } + head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); + newval = offset - 1; + targetptr = (intptr_t *)&buffer->c[cpu].offset; + ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, + (intptr_t *)&buffer->c[cpu].array[offset - 1], + (intptr_t)head, newval, cpu); + if (rseq_likely(!ret)) + break; + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; + return head; +} + +/* + * __percpu_buffer_pop is not safe against concurrent accesses. Should + * only be used on buffers that are not concurrently modified. + */ +struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, + int cpu) +{ + struct percpu_buffer_node *head; + intptr_t offset; + + offset = buffer->c[cpu].offset; + if (offset == 0) + return NULL; + head = buffer->c[cpu].array[offset - 1]; + buffer->c[cpu].offset = offset - 1; + return head; +} + +void *test_percpu_buffer_thread(void *arg) +{ + long long i, reps; + struct percpu_buffer *buffer = (struct percpu_buffer *)arg; + + if (!opt_disable_rseq && rseq_register_current_thread()) + abort(); + + reps = opt_reps; + for (i = 0; i < reps; i++) { + struct percpu_buffer_node *node; + + node = this_cpu_buffer_pop(buffer, NULL); + if (opt_yield) + sched_yield(); /* encourage shuffling */ + if (node) { + if (!this_cpu_buffer_push(buffer, node, NULL)) { + /* Should increase buffer size. */ + abort(); + } + } + } + + printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", + (int) gettid(), nr_abort, signals_delivered); + if (!opt_disable_rseq && rseq_unregister_current_thread()) + abort(); + + return NULL; +} + +/* Simultaneous modification to a per-cpu buffer from many threads. */ +void test_percpu_buffer(void) +{ + const int num_threads = opt_threads; + int i, j, ret; + uint64_t sum = 0, expected_sum = 0; + struct percpu_buffer buffer; + pthread_t test_threads[num_threads]; + cpu_set_t allowed_cpus; + + memset(&buffer, 0, sizeof(buffer)); + + /* Generate list entries for every usable cpu. */ + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); + for (i = 0; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + /* Worse-case is every item in same CPU. */ + buffer.c[i].array = + malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * + BUFFER_ITEM_PER_CPU); + assert(buffer.c[i].array); + buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; + for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { + struct percpu_buffer_node *node; + + expected_sum += j; + + /* + * We could theoretically put the word-sized + * "data" directly in the buffer. However, we + * want to model objects that would not fit + * within a single word, so allocate an object + * for each node. + */ + node = malloc(sizeof(*node)); + assert(node); + node->data = j; + buffer.c[i].array[j - 1] = node; + buffer.c[i].offset++; + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&test_threads[i], NULL, + test_percpu_buffer_thread, &buffer); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(test_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_buffer_node *node; + + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + + while ((node = __percpu_buffer_pop(&buffer, i))) { + sum += node->data; + free(node); + } + free(buffer.c[i].array); + } + + /* + * All entries should now be accounted for (unless some external + * actor is interfering with our allowed affinity while this + * test is running). + */ + assert(sum == expected_sum); +} + +bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, + struct percpu_memcpy_buffer_node item, + int *_cpu) +{ + bool result = false; + int cpu; + + for (;;) { + intptr_t *targetptr_final, newval_final, offset; + char *destptr, *srcptr; + size_t copylen; + int ret; + + cpu = rseq_cpu_start(); + /* Load offset with single-copy atomicity. */ + offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + if (offset == buffer->c[cpu].buflen) + break; + destptr = (char *)&buffer->c[cpu].array[offset]; + srcptr = (char *)&item; + /* copylen must be <= 4kB. */ + copylen = sizeof(item); + newval_final = offset + 1; + targetptr_final = &buffer->c[cpu].offset; + if (opt_mb) + ret = rseq_cmpeqv_trymemcpy_storev_release( + targetptr_final, offset, + destptr, srcptr, copylen, + newval_final, cpu); + else + ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, + offset, destptr, srcptr, copylen, + newval_final, cpu); + if (rseq_likely(!ret)) { + result = true; + break; + } + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; + return result; +} + +bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, + struct percpu_memcpy_buffer_node *item, + int *_cpu) +{ + bool result = false; + int cpu; + + for (;;) { + intptr_t *targetptr_final, newval_final, offset; + char *destptr, *srcptr; + size_t copylen; + int ret; + + cpu = rseq_cpu_start(); + /* Load offset with single-copy atomicity. */ + offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + if (offset == 0) + break; + destptr = (char *)item; + srcptr = (char *)&buffer->c[cpu].array[offset - 1]; + /* copylen must be <= 4kB. */ + copylen = sizeof(*item); + newval_final = offset - 1; + targetptr_final = &buffer->c[cpu].offset; + ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, + offset, destptr, srcptr, copylen, + newval_final, cpu); + if (rseq_likely(!ret)) { + result = true; + break; + } + /* Retry if comparison fails or rseq aborts. */ + } + if (_cpu) + *_cpu = cpu; + return result; +} + +/* + * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should + * only be used on buffers that are not concurrently modified. + */ +bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, + struct percpu_memcpy_buffer_node *item, + int cpu) +{ + intptr_t offset; + + offset = buffer->c[cpu].offset; + if (offset == 0) + return false; + memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); + buffer->c[cpu].offset = offset - 1; + return true; +} + +void *test_percpu_memcpy_buffer_thread(void *arg) +{ + long long i, reps; + struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; + + if (!opt_disable_rseq && rseq_register_current_thread()) + abort(); + + reps = opt_reps; + for (i = 0; i < reps; i++) { + struct percpu_memcpy_buffer_node item; + bool result; + + result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); + if (opt_yield) + sched_yield(); /* encourage shuffling */ + if (result) { + if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { + /* Should increase buffer size. */ + abort(); + } + } + } + + printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", + (int) gettid(), nr_abort, signals_delivered); + if (!opt_disable_rseq && rseq_unregister_current_thread()) + abort(); + + return NULL; +} + +/* Simultaneous modification to a per-cpu buffer from many threads. */ +void test_percpu_memcpy_buffer(void) +{ + const int num_threads = opt_threads; + int i, j, ret; + uint64_t sum = 0, expected_sum = 0; + struct percpu_memcpy_buffer buffer; + pthread_t test_threads[num_threads]; + cpu_set_t allowed_cpus; + + memset(&buffer, 0, sizeof(buffer)); + + /* Generate list entries for every usable cpu. */ + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); + for (i = 0; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + /* Worse-case is every item in same CPU. */ + buffer.c[i].array = + malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * + MEMCPY_BUFFER_ITEM_PER_CPU); + assert(buffer.c[i].array); + buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; + for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { + expected_sum += 2 * j + 1; + + /* + * We could theoretically put the word-sized + * "data" directly in the buffer. However, we + * want to model objects that would not fit + * within a single word, so allocate an object + * for each node. + */ + buffer.c[i].array[j - 1].data1 = j; + buffer.c[i].array[j - 1].data2 = j + 1; + buffer.c[i].offset++; + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&test_threads[i], NULL, + test_percpu_memcpy_buffer_thread, + &buffer); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(test_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_memcpy_buffer_node item; + + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + + while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { + sum += item.data1; + sum += item.data2; + } + free(buffer.c[i].array); + } + + /* + * All entries should now be accounted for (unless some external + * actor is interfering with our allowed affinity while this + * test is running). + */ + assert(sum == expected_sum); +} + +static void test_signal_interrupt_handler(int signo) +{ + signals_delivered++; +} + +static int set_signal_handler(void) +{ + int ret = 0; + struct sigaction sa; + sigset_t sigset; + + ret = sigemptyset(&sigset); + if (ret < 0) { + perror("sigemptyset"); + return ret; + } + + sa.sa_handler = test_signal_interrupt_handler; + sa.sa_mask = sigset; + sa.sa_flags = 0; + ret = sigaction(SIGUSR1, &sa, NULL); + if (ret < 0) { + perror("sigaction"); + return ret; + } + + printf_verbose("Signal handler set for SIGUSR1\n"); + + return ret; +} + +static void show_usage(int argc, char **argv) +{ + printf("Usage : %s <OPTIONS>\n", + argv[0]); + printf("OPTIONS:\n"); + printf(" [-1 loops] Number of loops for delay injection 1\n"); + printf(" [-2 loops] Number of loops for delay injection 2\n"); + printf(" [-3 loops] Number of loops for delay injection 3\n"); + printf(" [-4 loops] Number of loops for delay injection 4\n"); + printf(" [-5 loops] Number of loops for delay injection 5\n"); + printf(" [-6 loops] Number of loops for delay injection 6\n"); + printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); + printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); + printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); + printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); + printf(" [-y] Yield\n"); + printf(" [-k] Kill thread with signal\n"); + printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); + printf(" [-t N] Number of threads (default 200)\n"); + printf(" [-r N] Number of repetitions per thread (default 5000)\n"); + printf(" [-d] Disable rseq system call (no initialization)\n"); + printf(" [-D M] Disable rseq for each M threads\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); + printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); + printf(" [-v] Verbose output.\n"); + printf(" [-h] Show this help.\n"); + printf("\n"); +} + +int main(int argc, char **argv) +{ + int i; + + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') + continue; + switch (argv[i][1]) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); + i++; + break; + case 'm': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_modulo = atol(argv[i + 1]); + if (opt_modulo < 0) { + show_usage(argc, argv); + goto error; + } + i++; + break; + case 's': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_sleep = atol(argv[i + 1]); + if (opt_sleep < 0) { + show_usage(argc, argv); + goto error; + } + i++; + break; + case 'y': + opt_yield = 1; + break; + case 'k': + opt_signal = 1; + break; + case 'd': + opt_disable_rseq = 1; + break; + case 'D': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_disable_mod = atol(argv[i + 1]); + if (opt_disable_mod < 0) { + show_usage(argc, argv); + goto error; + } + i++; + break; + case 't': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_threads = atol(argv[i + 1]); + if (opt_threads < 0) { + show_usage(argc, argv); + goto error; + } + i++; + break; + case 'r': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_reps = atoll(argv[i + 1]); + if (opt_reps < 0) { + show_usage(argc, argv); + goto error; + } + i++; + break; + case 'h': + show_usage(argc, argv); + goto end; + case 'T': + if (argc < i + 2) { + show_usage(argc, argv); + goto error; + } + opt_test = *argv[i + 1]; + switch (opt_test) { + case 's': + case 'l': + case 'i': + case 'b': + case 'm': + break; + default: + show_usage(argc, argv); + goto error; + } + i++; + break; + case 'v': + verbose = 1; + break; + case 'M': + opt_mb = 1; + break; + default: + show_usage(argc, argv); + goto error; + } + } + + loop_cnt_1 = loop_cnt[1]; + loop_cnt_2 = loop_cnt[2]; + loop_cnt_3 = loop_cnt[3]; + loop_cnt_4 = loop_cnt[4]; + loop_cnt_5 = loop_cnt[5]; + loop_cnt_6 = loop_cnt[6]; + + if (set_signal_handler()) + goto error; + + if (!opt_disable_rseq && rseq_register_current_thread()) + goto error; + switch (opt_test) { + case 's': + printf_verbose("spinlock\n"); + test_percpu_spinlock(); + break; + case 'l': + printf_verbose("linked list\n"); + test_percpu_list(); + break; + case 'b': + printf_verbose("buffer\n"); + test_percpu_buffer(); + break; + case 'm': + printf_verbose("memcpy buffer\n"); + test_percpu_memcpy_buffer(); + break; + case 'i': + printf_verbose("counter increment\n"); + test_percpu_inc(); + break; + } + if (!opt_disable_rseq && rseq_unregister_current_thread()) + abort(); +end: + return 0; + +error: + return -1; +} diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h new file mode 100644 index 000000000000..3cea19877227 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -0,0 +1,716 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-arm.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") +#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") +#define rseq_smp_wmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "adr r0, " __rseq_str(cs_label) "\n\t" \ + "str r0, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "cmp %[" __rseq_str(cpu_id) "], r0\n\t" \ + "bne " __rseq_str(label) "\n\t" + +#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ + __rseq_str(table_label) ":\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ + ".word " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \ + start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(cmpfail_label) "]\n\t" + +#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[error2]\n\t" +#endif + "str r0, %[load]\n\t" + "add r0, %[voffp]\n\t" + "ldr r0, [r0]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + "ldr r0, %[v]\n\t" + "add r0, %[count]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* try store */ + "str %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* try store */ + "str %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + "dmb\n\t" /* full mb provides store-release */ + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[error3]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + "str %[src], %[rseq_scratch0]\n\t" + "str %[dst], %[rseq_scratch1]\n\t" + "str %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 7f\n\t" +#endif + /* try memcpy */ + "cmp %[len], #0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "ldrb %%r0, [%[src]]\n\t" \ + "strb %%r0, [%[dst]]\n\t" \ + "adds %[src], #1\n\t" \ + "adds %[dst], #1\n\t" \ + "subs %[len], #1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + "str %[src], %[rseq_scratch0]\n\t" + "str %[dst], %[rseq_scratch1]\n\t" + "str %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 7f\n\t" +#endif + /* try memcpy */ + "cmp %[len], #0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "ldrb %%r0, [%[src]]\n\t" \ + "strb %%r0, [%[dst]]\n\t" \ + "adds %[src], #1\n\t" \ + "adds %[dst], #1\n\t" \ + "subs %[len], #1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + "dmb\n\t" /* full mb provides store-release */ + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h new file mode 100644 index 000000000000..7f48ecf46994 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -0,0 +1,725 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Author: Paul Burton <paul.burton@mips.com> + * (C) Copyright 2018 MIPS Tech LLC + * + * Based on rseq-arm.h: + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory") +#define rseq_smp_rmb() rseq_smp_mb() +#define rseq_smp_wmb() rseq_smp_mb() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#if _MIPS_SZLONG == 64 +# define LONG ".dword" +# define LONG_LA "dla" +# define LONG_L "ld" +# define LONG_S "sd" +# define LONG_ADDI "daddiu" +# define U32_U64_PAD(x) x +#elif _MIPS_SZLONG == 32 +# define LONG ".word" +# define LONG_LA "la" +# define LONG_L "lw" +# define LONG_S "sw" +# define LONG_ADDI "addiu" +# ifdef __BIG_ENDIAN +# define U32_U64_PAD(x) "0x0, " x +# else +# define U32_U64_PAD(x) x ", 0x0" +# endif +#else +# error unsupported _MIPS_SZLONG +#endif + +#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \ + LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t" + +#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ + __rseq_str(table_label) ":\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \ + ".word " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \ + start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(cmpfail_label) "]\n\t" + +#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[error2]\n\t" +#endif + LONG_S " $4, %[load]\n\t" + LONG_ADDI " $4, %[voffp]\n\t" + LONG_L " $4, 0($4)\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " $4, %[v]\n\t" + LONG_ADDI " $4, %[count]\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + "sync\n\t" /* full sync provides store-release */ + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + LONG_ADDI " %[src], 1\n\t" \ + LONG_ADDI " %[dst], 1\n\t" \ + LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + LONG_ADDI " %[src], 1\n\t" \ + LONG_ADDI " %[dst], 1\n\t" \ + LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + "sync\n\t" /* full sync provides store-release */ + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h new file mode 100644 index 000000000000..52630c9f42be --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -0,0 +1,671 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-ppc.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com> + */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc") +#define rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc") +#define rseq_smp_rmb() rseq_smp_lwsync() +#define rseq_smp_wmb() rseq_smp_lwsync() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_smp_lwsync(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_lwsync() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_lwsync(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +/* + * The __rseq_table section can be used by debuggers to better handle + * single-stepping through the restartable critical sections. + */ + +#ifdef __PPC64__ + +#define STORE_WORD "std " +#define LOAD_WORD "ld " +#define LOADX_WORD "ldx " +#define CMP_WORD "cmpd " + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t" \ + "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t" \ + "rldicr %%r17, %%r17, 32, 31\n\t" \ + "oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t" \ + "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ + "std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#else /* #ifdef __PPC64__ */ + +#define STORE_WORD "stw " +#define LOAD_WORD "lwz " +#define LOADX_WORD "lwzx " +#define CMP_WORD "cmpw " + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + /* 32-bit only supported on BE */ \ + ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ + "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ + "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#endif /* #ifdef __PPC64__ */ + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ + "bne- cr7, " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + "b %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +/* + * RSEQ_ASM_OPs: asm operations for rseq + * RSEQ_ASM_OP_R_*: has hard-code registers in it + * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7) + */ +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + "bne- cr7, " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \ + LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ + "beq- cr7, " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_OP_STORE(value, var) \ + STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + +/* Load @var to r17 */ +#define RSEQ_ASM_OP_R_LOAD(var) \ + LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + +/* Store r17 to @var */ +#define RSEQ_ASM_OP_R_STORE(var) \ + STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + +/* Add @count to r17 */ +#define RSEQ_ASM_OP_R_ADD(count) \ + "add %%r17, %[" __rseq_str(count) "], %%r17\n\t" + +/* Load (r17 + voffp) to r17 */ +#define RSEQ_ASM_OP_R_LOADX(voffp) \ + LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + +/* TODO: implement a faster memcpy. */ +#define RSEQ_ASM_OP_R_MEMCPY() \ + "cmpdi %%r19, 0\n\t" \ + "beq 333f\n\t" \ + "addi %%r20, %%r20, -1\n\t" \ + "addi %%r21, %%r21, -1\n\t" \ + "222:\n\t" \ + "lbzu %%r18, 1(%%r20)\n\t" \ + "stbu %%r18, 1(%%r21)\n\t" \ + "addi %%r19, %%r19, -1\n\t" \ + "cmpdi %%r19, 0\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + __rseq_str(post_commit_label) ":\n\t" + +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + __rseq_str(post_commit_label) ":\n\t" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* store it in @load */ + RSEQ_ASM_OP_R_STORE(load) + /* dereference voffp(v) */ + RSEQ_ASM_OP_R_LOADX(voffp) + /* final store the value at voffp(v) */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "b" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* add @count to it */ + RSEQ_ASM_OP_R_ADD(count) + /* final store */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try store */ + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try store */ + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + /* for 'release' */ + "lwsync\n\t" + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* setup for mempcy */ + "mr %%r19, %[len]\n\t" + "mr %%r20, %[src]\n\t" + "mr %%r21, %[dst]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try memcpy */ + RSEQ_ASM_OP_R_MEMCPY() + RSEQ_INJECT_ASM(5) + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17", "r18", "r19", "r20", "r21" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* setup for mempcy */ + "mr %%r19, %[len]\n\t" + "mr %%r20, %[src]\n\t" + "mr %%r21, %[dst]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try memcpy */ + RSEQ_ASM_OP_R_MEMCPY() + RSEQ_INJECT_ASM(5) + /* for 'release' */ + "lwsync\n\t" + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17", "r18", "r19", "r20", "r21" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#undef STORE_WORD +#undef LOAD_WORD +#undef LOADX_WORD +#undef CMP_WORD + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h new file mode 100644 index 000000000000..72750b5905a9 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-skip.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-skip.h + * + * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + return -1; +} diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h new file mode 100644 index 000000000000..089410a314e9 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -0,0 +1,1132 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-x86.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#include <stdint.h> + +#define RSEQ_SIG 0x53053053 + +#ifdef __x86_64__ + +#define rseq_smp_mb() \ + __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define rseq_smp_rmb() rseq_barrier() +#define rseq_smp_wmb() rseq_barrier() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_barrier(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_barrier(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \ + "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + /* Disassembler-friendly signature: nopl <sig>(%rip). */\ + ".byte 0x0f, 0x1f, 0x05\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ + ".popsection\n\t" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "movq %[v], %%rbx\n\t" + "cmpq %%rbx, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "movq %[v], %%rbx\n\t" + "cmpq %%rbx, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + "movq %%rbx, %[load]\n\t" + "addq %[voffp], %%rbx\n\t" + "movq (%%rbx), %%rbx\n\t" + /* final store */ + "movq %%rbx, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "er" (voffp), + [load] "m" (*load) + : "memory", "cc", "rax", "rbx" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + /* final store */ + "addq %[count], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [count] "er" (count) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + "movq %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* x86-64 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu); +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "cmpq %[v2], %[expect2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" + "cmpq %[v2], %[expect2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + "movq %[src], %[rseq_scratch0]\n\t" + "movq %[dst], %[rseq_scratch1]\n\t" + "movq %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "cmpq %[v], %[expect]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + "test %[len], %[len]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[src]), %%al\n\t" \ + "movb %%al, (%[dst])\n\t" \ + "inc %[src]\n\t" \ + "inc %[dst]\n\t" \ + "dec %[len]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* x86-64 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len, + newv, cpu); +} + +#endif /* !RSEQ_SKIP_FASTPATH */ + +#elif __i386__ + +#define rseq_smp_mb() \ + __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") +#define rseq_smp_rmb() \ + __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") +#define rseq_smp_wmb() \ + __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +/* + * Use eax as scratch register and take memory operands as input to + * lessen register pressure. Especially needed when compiling in O0. + */ +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + /* Disassembler-friendly signature: nopl <sig>. */ \ + ".byte 0x0f, 0x1f, 0x05\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ + ".popsection\n\t" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpl %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpl %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + "movl %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "movl %[v], %%ebx\n\t" + "cmpl %%ebx, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "movl %[v], %%ebx\n\t" + "cmpl %%ebx, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + "movl %%ebx, %[load]\n\t" + "addl %[voffp], %%ebx\n\t" + "movl (%%ebx), %%ebx\n\t" + /* final store */ + "movl %%ebx, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "ir" (voffp), + [load] "m" (*load) + : "memory", "cc", "eax", "ebx" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + /* final store */ + "addl %[count], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [count] "ir" (count) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpl %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpl %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + "movl %[newv2], %%eax\n\t" + "movl %%eax, %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + "movl %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "m" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "movl %[expect], %%eax\n\t" + "cmpl %[v], %%eax\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "movl %[expect], %%eax\n\t" + "cmpl %[v], %%eax\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + "movl %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + "lock; addl $0,-128(%%esp)\n\t" + /* final store */ + "movl %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "m" (expect), + [newv] "r" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif + +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "cmpl %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "cmpl %[expect2], %[v2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "cmpl %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" + "cmpl %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + "movl %[newv], %%eax\n\t" + /* final store */ + "movl %%eax, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "m" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +/* TODO: implement a faster memcpy. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + "movl %[src], %[rseq_scratch0]\n\t" + "movl %[dst], %[rseq_scratch1]\n\t" + "movl %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + "test %[len], %[len]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[src]), %%al\n\t" \ + "movb %%al, (%[dst])\n\t" \ + "inc %[src]\n\t" \ + "inc %[dst]\n\t" \ + "dec %[len]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + "movl %[newv], %%eax\n\t" + /* final store */ + "movl %%eax, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "m" (expect), + [newv] "m" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* TODO: implement a faster memcpy. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + "movl %[src], %[rseq_scratch0]\n\t" + "movl %[dst], %[rseq_scratch1]\n\t" + "movl %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + "test %[len], %[len]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[src]), %%al\n\t" \ + "movb %%al, (%[dst])\n\t" \ + "inc %[src]\n\t" \ + "inc %[dst]\n\t" \ + "dec %[len]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + "lock; addl $0,-128(%%esp)\n\t" + "movl %[newv], %%eax\n\t" + /* final store */ + "movl %%eax, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "m" (expect), + [newv] "m" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ + +#endif diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c new file mode 100644 index 000000000000..4847e97ed049 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * rseq.c + * + * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <syscall.h> +#include <assert.h> +#include <signal.h> + +#include "rseq.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +__attribute__((tls_model("initial-exec"))) __thread +volatile struct rseq __rseq_abi = { + .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +}; + +static __attribute__((tls_model("initial-exec"))) __thread +volatile int refcount; + +static void signal_off_save(sigset_t *oldset) +{ + sigset_t set; + int ret; + + sigfillset(&set); + ret = pthread_sigmask(SIG_BLOCK, &set, oldset); + if (ret) + abort(); +} + +static void signal_restore(sigset_t oldset) +{ + int ret; + + ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); + if (ret) + abort(); +} + +static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) +{ + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); +} + +int rseq_register_current_thread(void) +{ + int rc, ret = 0; + sigset_t oldset; + + signal_off_save(&oldset); + if (refcount++) + goto end; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); + if (!rc) { + assert(rseq_current_cpu_raw() >= 0); + goto end; + } + if (errno != EBUSY) + __rseq_abi.cpu_id = -2; + ret = -1; + refcount--; +end: + signal_restore(oldset); + return ret; +} + +int rseq_unregister_current_thread(void) +{ + int rc, ret = 0; + sigset_t oldset; + + signal_off_save(&oldset); + if (--refcount) + goto end; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), + RSEQ_FLAG_UNREGISTER, RSEQ_SIG); + if (!rc) + goto end; + ret = -1; +end: + signal_restore(oldset); + return ret; +} + +int32_t rseq_fallback_current_cpu(void) +{ + int32_t cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h new file mode 100644 index 000000000000..86ce22417e0d --- /dev/null +++ b/tools/testing/selftests/rseq/rseq.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#ifndef RSEQ_H +#define RSEQ_H + +#include <stdint.h> +#include <stdbool.h> +#include <pthread.h> +#include <signal.h> +#include <sched.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sched.h> +#include <linux/rseq.h> + +/* + * Empty code injection macros, override when testing. + * It is important to consider that the ASM injection macros need to be + * fully reentrant (e.g. do not modify the stack). + */ +#ifndef RSEQ_INJECT_ASM +#define RSEQ_INJECT_ASM(n) +#endif + +#ifndef RSEQ_INJECT_C +#define RSEQ_INJECT_C(n) +#endif + +#ifndef RSEQ_INJECT_INPUT +#define RSEQ_INJECT_INPUT +#endif + +#ifndef RSEQ_INJECT_CLOBBER +#define RSEQ_INJECT_CLOBBER +#endif + +#ifndef RSEQ_INJECT_FAILED +#define RSEQ_INJECT_FAILED +#endif + +extern __thread volatile struct rseq __rseq_abi; + +#define rseq_likely(x) __builtin_expect(!!(x), 1) +#define rseq_unlikely(x) __builtin_expect(!!(x), 0) +#define rseq_barrier() __asm__ __volatile__("" : : : "memory") + +#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) +#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); }) +#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x) + +#define __rseq_str_1(x) #x +#define __rseq_str(x) __rseq_str_1(x) + +#define rseq_log(fmt, args...) \ + fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \ + ## args, __func__) + +#define rseq_bug(fmt, args...) \ + do { \ + rseq_log(fmt, ##args); \ + abort(); \ + } while (0) + +#if defined(__x86_64__) || defined(__i386__) +#include <rseq-x86.h> +#elif defined(__ARMEL__) +#include <rseq-arm.h> +#elif defined(__PPC__) +#include <rseq-ppc.h> +#elif defined(__mips__) +#include <rseq-mips.h> +#else +#error unsupported target +#endif + +/* + * Register rseq for the current thread. This needs to be called once + * by any thread which uses restartable sequences, before they start + * using restartable sequences, to ensure restartable sequences + * succeed. A restartable sequence executed from a non-registered + * thread will always fail. + */ +int rseq_register_current_thread(void); + +/* + * Unregister rseq for current thread. + */ +int rseq_unregister_current_thread(void); + +/* + * Restartable sequence fallback for reading the current CPU number. + */ +int32_t rseq_fallback_current_cpu(void); + +/* + * Values returned can be either the current CPU number, -1 (rseq is + * uninitialized), or -2 (rseq initialization has failed). + */ +static inline int32_t rseq_current_cpu_raw(void) +{ + return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); +} + +/* + * Returns a possible CPU number, which is typically the current CPU. + * The returned CPU number can be used to prepare for an rseq critical + * section, which will confirm whether the cpu number is indeed the + * current one, and whether rseq is initialized. + * + * The CPU number returned by rseq_cpu_start should always be validated + * by passing it to a rseq asm sequence, or by comparing it to the + * return value of rseq_current_cpu_raw() if the rseq asm sequence + * does not need to be invoked. + */ +static inline uint32_t rseq_cpu_start(void) +{ + return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); +} + +static inline uint32_t rseq_current_cpu(void) +{ + int32_t cpu; + + cpu = rseq_current_cpu_raw(); + if (rseq_unlikely(cpu < 0)) + cpu = rseq_fallback_current_cpu(); + return cpu; +} + +static inline void rseq_clear_rseq_cs(void) +{ +#ifdef __LP64__ + __rseq_abi.rseq_cs.ptr = 0; +#else + __rseq_abi.rseq_cs.ptr.ptr32 = 0; +#endif +} + +/* + * rseq_prepare_unload() should be invoked by each thread executing a rseq + * critical section at least once between their last critical section and + * library unload of the library defining the rseq critical section + * (struct rseq_cs). This also applies to use of rseq in code generated by + * JIT: rseq_prepare_unload() should be invoked at least once by each + * thread executing a rseq critical section before reclaim of the memory + * holding the struct rseq_cs. + */ +static inline void rseq_prepare_unload(void) +{ + rseq_clear_rseq_cs(); +} + +#endif /* RSEQ_H_ */ diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh new file mode 100755 index 000000000000..3acd6d75ff9f --- /dev/null +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ or MIT + +EXTRA_ARGS=${@} + +OLDIFS="$IFS" +IFS=$'\n' +TEST_LIST=( + "-T s" + "-T l" + "-T b" + "-T b -M" + "-T m" + "-T m -M" + "-T i" +) + +TEST_NAME=( + "spinlock" + "list" + "buffer" + "buffer with barrier" + "memcpy" + "memcpy with barrier" + "increment" +) +IFS="$OLDIFS" + +REPS=1000 +SLOW_REPS=100 + +function do_tests() +{ + local i=0 + while [ "$i" -lt "${#TEST_LIST[@]}" ]; do + echo "Running test ${TEST_NAME[$i]}" + ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1 + echo "Running compare-twice test ${TEST_NAME[$i]}" + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1 + let "i++" + done +} + +echo "Default parameters" +do_tests + +echo "Loop injection: 10000 loops" + +OLDIFS="$IFS" +IFS=$'\n' +INJECT_LIST=( + "1" + "2" + "3" + "4" + "5" + "6" + "7" + "8" + "9" +) +IFS="$OLDIFS" + +NR_LOOPS=10000 + +i=0 +while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do + echo "Injecting at <${INJECT_LIST[$i]}>" + do_tests -${INJECT_LIST[i]} ${NR_LOOPS} + let "i++" +done +NR_LOOPS= + +function inject_blocking() +{ + OLDIFS="$IFS" + IFS=$'\n' + INJECT_LIST=( + "7" + "8" + "9" + ) + IFS="$OLDIFS" + + NR_LOOPS=-1 + + i=0 + while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do + echo "Injecting at <${INJECT_LIST[$i]}>" + do_tests -${INJECT_LIST[i]} -1 ${@} + let "i++" + done + NR_LOOPS= +} + +echo "Yield injection (25%)" +inject_blocking -m 4 -y + +echo "Yield injection (50%)" +inject_blocking -m 2 -y + +echo "Yield injection (100%)" +inject_blocking -m 1 -y + +echo "Kill injection (25%)" +inject_blocking -m 4 -k + +echo "Kill injection (50%)" +inject_blocking -m 2 -k + +echo "Kill injection (100%)" +inject_blocking -m 1 -k + +echo "Sleep injection (1ms, 25%)" +inject_blocking -m 4 -s 1 + +echo "Sleep injection (1ms, 50%)" +inject_blocking -m 2 -s 1 + +echo "Sleep injection (1ms, 100%)" +inject_blocking -m 1 -s 1 diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore new file mode 100644 index 000000000000..d0ad44f6294a --- /dev/null +++ b/tools/testing/selftests/rtc/.gitignore @@ -0,0 +1,2 @@ +rtctest +setdate diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile new file mode 100644 index 000000000000..de9c8566672a --- /dev/null +++ b/tools/testing/selftests/rtc/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -O3 -Wl,-no-as-needed -Wall +LDFLAGS += -lrt -lpthread -lm + +TEST_GEN_PROGS = rtctest + +TEST_GEN_PROGS_EXTENDED = setdate + +include ../lib.mk diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c new file mode 100644 index 000000000000..e20b017e7073 --- /dev/null +++ b/tools/testing/selftests/rtc/rtctest.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Real Time Clock Driver Test Program + * + * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/rtc.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define NUM_UIE 3 +#define ALARM_DELTA 3 + +static char *rtc_file = "/dev/rtc0"; + +FIXTURE(rtc) { + int fd; +}; + +FIXTURE_SETUP(rtc) { + self->fd = open(rtc_file, O_RDONLY); + ASSERT_NE(-1, self->fd); +} + +FIXTURE_TEARDOWN(rtc) { + close(self->fd); +} + +TEST_F(rtc, date_read) { + int rc; + struct rtc_time rtc_tm; + + /* Read the RTC time/date */ + rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm); + ASSERT_NE(-1, rc); + + TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); +} + +TEST_F(rtc, uie_read) { + int i, rc, irq = 0; + unsigned long data; + + /* Turn on update interrupts */ + rc = ioctl(self->fd, RTC_UIE_ON, 0); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip update IRQs not supported."); + return; + } + + for (i = 0; i < NUM_UIE; i++) { + /* This read will block */ + rc = read(self->fd, &data, sizeof(data)); + ASSERT_NE(-1, rc); + irq++; + } + + EXPECT_EQ(NUM_UIE, irq); + + rc = ioctl(self->fd, RTC_UIE_OFF, 0); + ASSERT_NE(-1, rc); +} + +TEST_F(rtc, uie_select) { + int i, rc, irq = 0; + unsigned long data; + + /* Turn on update interrupts */ + rc = ioctl(self->fd, RTC_UIE_ON, 0); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip update IRQs not supported."); + return; + } + + for (i = 0; i < NUM_UIE; i++) { + struct timeval tv = { .tv_sec = 2 }; + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); + + /* This read won't block */ + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + irq++; + } + + EXPECT_EQ(NUM_UIE, irq); + + rc = ioctl(self->fd, RTC_UIE_OFF, 0); + ASSERT_NE(-1, rc); +} + +TEST_F(rtc, alarm_alm_set) { + struct timeval tv = { .tv_sec = ALARM_DELTA + 2 }; + unsigned long data; + struct rtc_time tm; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&tm) + ALARM_DELTA; + gmtime_r(&secs, (struct tm *)&tm); + + rc = ioctl(self->fd, RTC_ALM_SET, &tm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_ALM_READ, &tm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d:%02d:%02d.", + tm.tm_hour, tm.tm_min, tm.tm_sec); + + /* Enable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_ON, 0); + ASSERT_NE(-1, rc); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + EXPECT_NE(0, rc); + + /* Disable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_OFF, 0); + ASSERT_NE(-1, rc); + + if (rc == 0) + return; + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + TH_LOG("data: %lx", data); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_wkalm_set) { + struct timeval tv = { .tv_sec = ALARM_DELTA + 2 }; + struct rtc_wkalrm alarm = { 0 }; + struct rtc_time tm; + unsigned long data; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA; + gmtime_r(&secs, (struct tm *)&alarm.time); + + alarm.enabled = 1; + + rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_WKALM_RD, &alarm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.", + alarm.time.tm_mday, alarm.time.tm_mon + 1, + alarm.time.tm_year + 1900, alarm.time.tm_hour, + alarm.time.tm_min, alarm.time.tm_sec); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + EXPECT_NE(0, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +static void __attribute__((constructor)) +__constructor_order_last(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; +} + +int main(int argc, char **argv) +{ + switch (argc) { + case 2: + rtc_file = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]); + return 1; + } + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c index 2cb78489eca4..2cb78489eca4 100644 --- a/tools/testing/selftests/timers/rtctest_setdate.c +++ b/tools/testing/selftests/rtc/setdate.c diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile new file mode 100644 index 000000000000..a19531dba4dc --- /dev/null +++ b/tools/testing/selftests/sparc64/Makefile @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0 +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/) + +ifneq ($(ARCH),sparc64) +nothing: +.PHONY: all clean run_tests install +.SILENT: +else + +SUBDIRS := drivers + +TEST_PROGS := run.sh + + +.PHONY: all clean + +include ../lib.mk + +all: + @for DIR in $(SUBDIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + mkdir $$BUILD_TARGET -p; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + #SUBDIR test prog name should be in the form: SUBDIR_test.sh \ + TEST=$$DIR"_test.sh"; \ + if [ -e $$DIR/$$TEST ]; then \ + rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \ + fi \ + done + +override define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + + @for SUBDIR in $(SUBDIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \ + mkdir $$BUILD_TARGET -p; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ + done; +endef + +override define CLEAN + @for DIR in $(SUBDIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + mkdir $$BUILD_TARGET -p; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + done +endef +endif diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore new file mode 100644 index 000000000000..90e835ed74e6 --- /dev/null +++ b/tools/testing/selftests/sparc64/drivers/.gitignore @@ -0,0 +1 @@ +adi-test diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile new file mode 100644 index 000000000000..deb0df415565 --- /dev/null +++ b/tools/testing/selftests/sparc64/drivers/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +INCLUDEDIR := -I. +CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g + +TEST_GEN_FILES := adi-test + +all: $(TEST_GEN_FILES) + +$(TEST_GEN_FILES): adi-test.c + +TEST_PROGS := drivers_test.sh + +include ../../lib.mk + +$(OUTPUT)/adi-test: adi-test.c diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c new file mode 100644 index 000000000000..95d93c6a88a5 --- /dev/null +++ b/tools/testing/selftests/sparc64/drivers/adi-test.c @@ -0,0 +1,721 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * selftest for sparc64's privileged ADI driver + * + * Author: Tom Hromatka <tom.hromatka@oracle.com> + */ +#include <linux/kernel.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "../../kselftest.h" + +#define DEBUG_LEVEL_1_BIT (0x0001) +#define DEBUG_LEVEL_2_BIT (0x0002) +#define DEBUG_LEVEL_3_BIT (0x0004) +#define DEBUG_LEVEL_4_BIT (0x0008) +#define DEBUG_TIMING_BIT (0x1000) + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* bit mask of enabled bits to print */ +#define DEBUG 0x0001 + +#define DEBUG_PRINT_L1(...) debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__) +#define DEBUG_PRINT_L2(...) debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__) +#define DEBUG_PRINT_L3(...) debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__) +#define DEBUG_PRINT_L4(...) debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__) +#define DEBUG_PRINT_T(...) debug_print(DEBUG_TIMING_BIT, __VA_ARGS__) + +static void debug_print(int level, const char *s, ...) +{ + va_list args; + + va_start(args, s); + + if (DEBUG & level) + vfprintf(stdout, s, args); + va_end(args); +} + +#ifndef min +#define min(x, y) ((x) < (y) ? x : y) +#endif + +#define RETURN_FROM_TEST(_ret) \ + do { \ + DEBUG_PRINT_L1( \ + "\tTest %s returned %d\n", __func__, _ret); \ + return _ret; \ + } while (0) + +#define ADI_BLKSZ 64 +#define ADI_MAX_VERSION 15 + +#define TEST_STEP_FAILURE(_ret) \ + do { \ + fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \ + _ret, __func__, __LINE__); \ + goto out; \ + } while (0) + +#define RDTICK(_x) \ + asm volatile(" rd %%tick, %0\n" : "=r" (_x)) + +static int random_version(void) +{ + long tick; + + RDTICK(tick); + + return tick % (ADI_MAX_VERSION + 1); +} + +#define MAX_RANGES_SUPPORTED 5 +static const char system_ram_str[] = "System RAM\n"; +static int range_count; +static unsigned long long int start_addr[MAX_RANGES_SUPPORTED]; +static unsigned long long int end_addr[MAX_RANGES_SUPPORTED]; + +struct stats { + char name[16]; + unsigned long total; + unsigned long count; + unsigned long bytes; +}; + +static struct stats read_stats = { + .name = "read", .total = 0, .count = 0, .bytes = 0}; +static struct stats pread_stats = { + .name = "pread", .total = 0, .count = 0, .bytes = 0}; +static struct stats write_stats = { + .name = "write", .total = 0, .count = 0, .bytes = 0}; +static struct stats pwrite_stats = { + .name = "pwrite", .total = 0, .count = 0, .bytes = 0}; +static struct stats seek_stats = { + .name = "seek", .total = 0, .count = 0, .bytes = 0}; + +static void update_stats(struct stats * const ustats, + unsigned long measurement, unsigned long bytes) +{ + ustats->total += measurement; + ustats->bytes += bytes; + ustats->count++; +} + +static void print_ustats(const struct stats * const ustats) +{ + DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n", + ustats->name, ustats->count, + (float)ustats->total / (float)ustats->count, + (float)ustats->bytes / (float)ustats->count); +} + +static void print_stats(void) +{ + DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n" + "\tCount\t(ticks)\t(bytes)\n" + "-------------------------------\n"); + + print_ustats(&read_stats); + print_ustats(&pread_stats); + print_ustats(&write_stats); + print_ustats(&pwrite_stats); + print_ustats(&seek_stats); +} + +static int build_memory_map(void) +{ + char line[256]; + FILE *fp; + int i; + + range_count = 0; + + fp = fopen("/proc/iomem", "r"); + if (!fp) { + fprintf(stderr, "/proc/iomem: error %d: %s\n", + errno, strerror(errno)); + return -errno; + } + + while (fgets(line, sizeof(line), fp) != 0) { + if (strstr(line, system_ram_str)) { + char *dash, *end_ptr; + + /* Given a line like this: + * d0400000-10ffaffff : System RAM + * replace the "-" with a space + */ + dash = strstr(line, "-"); + dash[0] = 0x20; + + start_addr[range_count] = strtoull(line, &end_ptr, 16); + end_addr[range_count] = strtoull(end_ptr, NULL, 16); + range_count++; + } + } + + fclose(fp); + + DEBUG_PRINT_L1("RAM Ranges\n"); + for (i = 0; i < range_count; i++) + DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n", + i, start_addr[i], end_addr[i]); + + if (range_count == 0) { + fprintf(stderr, "No valid address ranges found. Error.\n"); + return -1; + } + + return 0; +} + +static int read_adi(int fd, unsigned char *buf, int buf_sz) +{ + int ret, bytes_read = 0; + long start, end, elapsed_time = 0; + + do { + RDTICK(start); + ret = read(fd, buf + bytes_read, buf_sz - bytes_read); + RDTICK(end); + if (ret < 0) + return -errno; + + elapsed_time += end - start; + update_stats(&read_stats, elapsed_time, buf_sz); + bytes_read += ret; + + } while (bytes_read < buf_sz); + + DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time); + DEBUG_PRINT_L3("\tRead %d bytes\n", bytes_read); + + return bytes_read; +} + +static int pread_adi(int fd, unsigned char *buf, + int buf_sz, unsigned long offset) +{ + int ret, i, bytes_read = 0; + unsigned long cur_offset; + long start, end, elapsed_time = 0; + + cur_offset = offset; + do { + RDTICK(start); + ret = pread(fd, buf + bytes_read, buf_sz - bytes_read, + cur_offset); + RDTICK(end); + if (ret < 0) + return -errno; + + elapsed_time += end - start; + update_stats(&pread_stats, elapsed_time, buf_sz); + bytes_read += ret; + cur_offset += ret; + + } while (bytes_read < buf_sz); + + DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time); + DEBUG_PRINT_L3("\tRead %d bytes starting at offset 0x%lx\n", + bytes_read, offset); + for (i = 0; i < bytes_read; i++) + DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]); + + return bytes_read; +} + +static int write_adi(int fd, const unsigned char * const buf, int buf_sz) +{ + int ret, bytes_written = 0; + long start, end, elapsed_time = 0; + + do { + RDTICK(start); + ret = write(fd, buf + bytes_written, buf_sz - bytes_written); + RDTICK(end); + if (ret < 0) + return -errno; + + elapsed_time += (end - start); + update_stats(&write_stats, elapsed_time, buf_sz); + bytes_written += ret; + } while (bytes_written < buf_sz); + + DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time); + DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz); + + return bytes_written; +} + +static int pwrite_adi(int fd, const unsigned char * const buf, + int buf_sz, unsigned long offset) +{ + int ret, bytes_written = 0; + unsigned long cur_offset; + long start, end, elapsed_time = 0; + + cur_offset = offset; + + do { + RDTICK(start); + ret = pwrite(fd, buf + bytes_written, + buf_sz - bytes_written, cur_offset); + RDTICK(end); + if (ret < 0) { + fprintf(stderr, "pwrite(): error %d: %s\n", + errno, strerror(errno)); + return -errno; + } + + elapsed_time += (end - start); + update_stats(&pwrite_stats, elapsed_time, buf_sz); + bytes_written += ret; + cur_offset += ret; + + } while (bytes_written < buf_sz); + + DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time); + DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n", + bytes_written, buf_sz, offset); + + return bytes_written; +} + +static off_t seek_adi(int fd, off_t offset, int whence) +{ + long start, end; + off_t ret; + + RDTICK(start); + ret = lseek(fd, offset, whence); + RDTICK(end); + DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret); + if (ret < 0) + goto out; + + DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start); + update_stats(&seek_stats, end - start, 0); + +out: + (void)lseek(fd, 0, SEEK_END); + return ret; +} + +static int test0_prpw_aligned_1byte(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + (end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1); + unsigned char version[1], expected_version; + loff_t offset; + int ret; + + version[0] = random_version(); + expected_version = version[0]; + + offset = paddr / ADI_BLKSZ; + + ret = pwrite_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + ret = pread_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + if (expected_version != version[0]) { + DEBUG_PRINT_L2("\tExpected version %d but read version %d\n", + expected_version, version[0]); + TEST_STEP_FAILURE(-expected_version); + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +#define TEST1_VERSION_SZ 4096 +static int test1_prpw_aligned_4096bytes(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + (end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1); + unsigned char version[TEST1_VERSION_SZ], + expected_version[TEST1_VERSION_SZ]; + loff_t offset; + int ret, i; + + for (i = 0; i < TEST1_VERSION_SZ; i++) { + version[i] = random_version(); + expected_version[i] = version[i]; + } + + offset = paddr / ADI_BLKSZ; + + ret = pwrite_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + ret = pread_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + for (i = 0; i < TEST1_VERSION_SZ; i++) { + if (expected_version[i] != version[i]) { + DEBUG_PRINT_L2( + "\tExpected version %d but read version %d\n", + expected_version, version[0]); + TEST_STEP_FAILURE(-expected_version[i]); + } + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +#define TEST2_VERSION_SZ 10327 +static int test2_prpw_aligned_10327bytes(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + (start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1); + unsigned char version[TEST2_VERSION_SZ], + expected_version[TEST2_VERSION_SZ]; + loff_t offset; + int ret, i; + + for (i = 0; i < TEST2_VERSION_SZ; i++) { + version[i] = random_version(); + expected_version[i] = version[i]; + } + + offset = paddr / ADI_BLKSZ; + + ret = pwrite_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + ret = pread_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + for (i = 0; i < TEST2_VERSION_SZ; i++) { + if (expected_version[i] != version[i]) { + DEBUG_PRINT_L2( + "\tExpected version %d but read version %d\n", + expected_version, version[0]); + TEST_STEP_FAILURE(-expected_version[i]); + } + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +#define TEST3_VERSION_SZ 12541 +static int test3_prpw_unaligned_12541bytes(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + ((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17; + unsigned char version[TEST3_VERSION_SZ], + expected_version[TEST3_VERSION_SZ]; + loff_t offset; + int ret, i; + + for (i = 0; i < TEST3_VERSION_SZ; i++) { + version[i] = random_version(); + expected_version[i] = version[i]; + } + + offset = paddr / ADI_BLKSZ; + + ret = pwrite_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + ret = pread_adi(fd, version, sizeof(version), offset); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + for (i = 0; i < TEST3_VERSION_SZ; i++) { + if (expected_version[i] != version[i]) { + DEBUG_PRINT_L2( + "\tExpected version %d but read version %d\n", + expected_version, version[0]); + TEST_STEP_FAILURE(-expected_version[i]); + } + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +static int test4_lseek(int fd) +{ +#define OFFSET_ADD (0x100) +#define OFFSET_SUBTRACT (0xFFFFFFF000000000) + + off_t offset_out, offset_in; + int ret; + + + offset_in = 0x123456789abcdef0; + offset_out = seek_adi(fd, offset_in, SEEK_SET); + if (offset_out != offset_in) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + /* seek to the current offset. this should return EINVAL */ + offset_out = seek_adi(fd, offset_in, SEEK_SET); + if (offset_out < 0 && errno == EINVAL) + DEBUG_PRINT_L2( + "\tSEEK_SET failed as designed. Not an error\n"); + else { + ret = -2; + TEST_STEP_FAILURE(ret); + } + + offset_out = seek_adi(fd, 0, SEEK_CUR); + if (offset_out != offset_in) { + ret = -3; + TEST_STEP_FAILURE(ret); + } + + offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR); + if (offset_out != (offset_in + OFFSET_ADD)) { + ret = -4; + TEST_STEP_FAILURE(ret); + } + + offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR); + if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) { + ret = -5; + TEST_STEP_FAILURE(ret); + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +static int test5_rw_aligned_1byte(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + (end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1); + unsigned char version, expected_version; + loff_t offset; + off_t oret; + int ret; + + offset = paddr / ADI_BLKSZ; + version = expected_version = random_version(); + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = write_adi(fd, &version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = read_adi(fd, &version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + if (expected_version != version) { + DEBUG_PRINT_L2("\tExpected version %d but read version %d\n", + expected_version, version); + TEST_STEP_FAILURE(-expected_version); + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +#define TEST6_VERSION_SZ 9434 +static int test6_rw_aligned_9434bytes(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + (end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1); + unsigned char version[TEST6_VERSION_SZ], + expected_version[TEST6_VERSION_SZ]; + loff_t offset; + off_t oret; + int ret, i; + + offset = paddr / ADI_BLKSZ; + for (i = 0; i < TEST6_VERSION_SZ; i++) + version[i] = expected_version[i] = random_version(); + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = write_adi(fd, version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + memset(version, 0, TEST6_VERSION_SZ); + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = read_adi(fd, version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + for (i = 0; i < TEST6_VERSION_SZ; i++) { + if (expected_version[i] != version[i]) { + DEBUG_PRINT_L2( + "\tExpected version %d but read version %d\n", + expected_version[i], version[i]); + TEST_STEP_FAILURE(-expected_version[i]); + } + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +#define TEST7_VERSION_SZ 14963 +static int test7_rw_aligned_14963bytes(int fd) +{ + /* somewhat arbitrarily chosen address */ + unsigned long paddr = + ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39; + unsigned char version[TEST7_VERSION_SZ], + expected_version[TEST7_VERSION_SZ]; + loff_t offset; + off_t oret; + int ret, i; + + offset = paddr / ADI_BLKSZ; + for (i = 0; i < TEST7_VERSION_SZ; i++) { + version[i] = random_version(); + expected_version[i] = version[i]; + } + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = write_adi(fd, version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + memset(version, 0, TEST7_VERSION_SZ); + + oret = seek_adi(fd, offset, SEEK_SET); + if (oret != offset) { + ret = -1; + TEST_STEP_FAILURE(ret); + } + + ret = read_adi(fd, version, sizeof(version)); + if (ret != sizeof(version)) + TEST_STEP_FAILURE(ret); + + for (i = 0; i < TEST7_VERSION_SZ; i++) { + if (expected_version[i] != version[i]) { + DEBUG_PRINT_L2( + "\tExpected version %d but read version %d\n", + expected_version[i], version[i]); + TEST_STEP_FAILURE(-expected_version[i]); + } + + paddr += ADI_BLKSZ; + } + + ret = 0; +out: + RETURN_FROM_TEST(ret); +} + +static int (*tests[])(int fd) = { + test0_prpw_aligned_1byte, + test1_prpw_aligned_4096bytes, + test2_prpw_aligned_10327bytes, + test3_prpw_unaligned_12541bytes, + test4_lseek, + test5_rw_aligned_1byte, + test6_rw_aligned_9434bytes, + test7_rw_aligned_14963bytes, +}; +#define TEST_COUNT ARRAY_SIZE(tests) + +int main(int argc, char *argv[]) +{ + int fd, ret, test; + + ret = build_memory_map(); + if (ret < 0) + return ret; + + fd = open("/dev/adi", O_RDWR); + if (fd < 0) { + fprintf(stderr, "open: error %d: %s\n", + errno, strerror(errno)); + return -errno; + } + + for (test = 0; test < TEST_COUNT; test++) { + DEBUG_PRINT_L1("Running test #%d\n", test); + + ret = (*tests[test])(fd); + if (ret != 0) + ksft_test_result_fail("Test #%d failed: error %d\n", + test, ret); + else + ksft_test_result_pass("Test #%d passed\n", test); + } + + print_stats(); + close(fd); + + if (ksft_get_fail_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); + + /* it's impossible to get here, but the compiler throws a warning + * about control reaching the end of non-void function. bah. + */ + return 0; +} diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh new file mode 100755 index 000000000000..6d08273b7532 --- /dev/null +++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +SRC_TREE=../../../../ + +test_run() +{ + if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then + insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + else + # Use modprobe dry run to check for missing adi module + if ! /sbin/modprobe -q -n adi; then + echo "adi: [SKIP]" + elif /sbin/modprobe -q adi; then + echo "adi: ok" + else + echo "adi: [FAIL]" + rc=1 + fi + fi + ./adi-test + rmmod adi 2> /dev/null +} + +rc=0 +test_run +exit $rc diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh new file mode 100755 index 000000000000..38ad61f9328e --- /dev/null +++ b/tools/testing/selftests/sparc64/run.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +(cd drivers; ./drivers_test.sh) diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh index 24cff498b31a..fc9f8cde7d42 100755 --- a/tools/testing/selftests/static_keys/test_static_keys.sh +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -2,6 +2,19 @@ # SPDX-License-Identifier: GPL-2.0 # Runs static keys kernel module tests +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_static_key_base; then + echo "static_key: module test_static_key_base is not found [SKIP]" + exit $ksft_skip +fi + +if ! /sbin/modprobe -q -n test_static_keys; then + echo "static_key: module test_static_keys is not found [SKIP]" + exit $ksft_skip +fi + if /sbin/modprobe -q test_static_key_base; then if /sbin/modprobe -q test_static_keys; then echo "static_key: ok" diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config new file mode 100644 index 000000000000..1ab7e8130db2 --- /dev/null +++ b/tools/testing/selftests/sync/config @@ -0,0 +1,4 @@ +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index ec232c3cfcaa..584eb8ea780a 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -14,6 +14,9 @@ # This performs a series tests against the proc sysctl interface. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + TEST_NAME="sysctl" TEST_DRIVER="test_${TEST_NAME}" TEST_DIR=$(dirname $0) @@ -41,7 +44,7 @@ test_modprobe() echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip fi } @@ -98,28 +101,30 @@ test_reqs() uid=$(id -u) if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi if ! which perl 2> /dev/null > /dev/null; then echo "$0: You need perl installed" - exit 1 + exit $ksft_skip fi if ! which getconf 2> /dev/null > /dev/null; then echo "$0: You need getconf installed" - exit 1 + exit $ksft_skip fi if ! which diff 2> /dev/null > /dev/null; then echo "$0: You need diff installed" - exit 1 + exit $ksft_skip fi } function load_req_mod() { - trap "test_modprobe" EXIT - if [ ! -d $DIR ]; then + if ! modprobe -q -n $TEST_DRIVER; then + echo "$0: module $TEST_DRIVER not found [SKIP]" + exit $ksft_skip + fi modprobe $TEST_DRIVER if [ $? -ne 0 ]; then exit @@ -765,6 +770,7 @@ function parse_args() test_reqs allow_user_defaults check_production_sysctl_writes_strict +test_modprobe load_req_mod trap "test_finish" EXIT diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index 93cf8fea8ae7..3a2f51fc7fd4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -398,13 +398,83 @@ 255 ] ], - "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args", - "expExitCode": "255", + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", "verifyCmd": "$TC actions ls action csum", "matchPattern": "^[ \t]+index [0-9]* ref", "matchCount": "32", "teardown": [ "$TC actions flush action csum" ] + }, + { + "id": "b4e9", + "name": "Delete batch of 32 csum actions", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action csum", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "0015", + "name": "Add batch of 32 csum tcp actions with large cookies", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action csum", + "matchPattern": "^[ \t]+index [0-9]* ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "989e", + "name": "Delete batch of 32 csum actions with large cookies", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action csum", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 9f34f0753969..637ea0219617 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1,7 +1,7 @@ [ { - "id": "a568", - "name": "Add action with ife type", + "id": "7682", + "name": "Create valid ife encode action with mark and pass control", "category": [ "actions", "ife" @@ -12,21 +12,400 @@ 0, 1, 255 - ], - "$TC actions add action ife encode type 0xDEAD index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "ef47", + "name": "Create valid ife encode action with mark and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "df43", + "name": "Create valid ife encode action with mark and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "e4cf", + "name": "Create valid ife encode action with mark and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "ccba", + "name": "Create valid ife encode action with mark and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "a1cf", + "name": "Create valid ife encode action with mark and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "cb3d", + "name": "Create valid ife encode action with mark value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "1efb", + "name": "Create ife encode action with mark value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchCount": "0", + "teardown": [] + }, + { + "id": "95ed", + "name": "Create valid ife encode action with prio and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "aa17", + "name": "Create valid ife encode action with prio and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "74c7", + "name": "Create valid ife encode action with prio and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7a97", + "name": "Create valid ife encode action with prio and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "f66b", + "name": "Create valid ife encode action with prio and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "3056", + "name": "Create valid ife encode action with prio and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7dd3", + "name": "Create valid ife encode action with prio value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "2ca1", + "name": "Create ife encode action with prio value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchCount": "0", + "teardown": [] + }, + { + "id": "05bb", + "name": "Create valid ife encode action with tcindex and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xDEAD", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] }, { - "id": "b983", - "name": "Add action without ife type", + "id": "ce65", + "name": "Create valid ife encode action with tcindex and pipe control", "category": [ "actions", "ife" @@ -37,16 +416,649 @@ 0, 1, 255 - ], - "$TC actions add action ife encode index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xED3E", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] + }, + { + "id": "09cd", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8eb5", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "451a", + "name": "Create valid ife encode action with tcindex and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d76c", + "name": "Create valid ife encode action with tcindex and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "e731", + "name": "Create valid ife encode action with tcindex and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "b7b8", + "name": "Create valid ife encode action with tcindex value at 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d0d8", + "name": "Create ife encode action with tcindex value exceeding 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2a9c", + "name": "Create valid ife encode action with mac src parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "cf5c", + "name": "Create valid ife encode action with mac dst parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "2353", + "name": "Create valid ife encode action with mac src and mac dst parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "552c", + "name": "Create valid ife encode action with mark and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "0421", + "name": "Create valid ife encode action with prio and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "4017", + "name": "Create valid ife encode action with tcindex and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "fac3", + "name": "Create valid ife encode action with index at 32-bit maximnum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7c25", + "name": "Create valid ife decode action with pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "dccb", + "name": "Create valid ife decode action with pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7bb9", + "name": "Create valid ife decode action with continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d9ad", + "name": "Create valid ife decode action with drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode drop index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "219f", + "name": "Create valid ife decode action with reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8f44", + "name": "Create valid ife decode action with jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "56cf", + "name": "Create ife encode action with index exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ee94", + "name": "Create ife encode action with invalid control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "b330", + "name": "Create ife encode action with cookie", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "bbc0", + "name": "Create ife encode action with invalid argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "d54a", + "name": "Create ife encode action with invalid type argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "7ee0", + "name": "Create ife encode action with invalid mac src argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "0a7d", + "name": "Create ife encode action with invalid mac dst argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 443c9b3c8664..6e4edfae1799 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -340,7 +340,7 @@ }, { "id": "8b69", - "name": "Add mirred mirror action with maximum index", + "name": "Add mirred mirror action with index at 32-bit maximum", "category": [ "actions", "mirred" @@ -363,6 +363,28 @@ ] }, { + "id": "3f66", + "name": "Add mirred mirror action with index exceeding 32-bit maximum", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555", + "expExitCode": "255", + "verifyCmd": "$TC actions get action mirred index 429496729555", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555", + "matchCount": "0", + "teardown": [] + }, + { "id": "a70e", "name": "Delete mirred mirror action", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 38d85a1d7492..f03763d81617 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -401,11 +401,11 @@ ], "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295", "expExitCode": "0", - "verifyCmd": "$TC actions get action mirred index 4294967295", + "verifyCmd": "$TC actions get action police index 4294967295", "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb", "matchCount": "1", "teardown": [ - "$TC actions flush action mirred" + "$TC actions flush action police" ] }, { diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json new file mode 100644 index 000000000000..3aca33c00039 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -0,0 +1,588 @@ +[ + { + "id": "9784", + "name": "Add valid sample action with mandatory arguments", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "5c91", + "name": "Add valid sample action with mandatory arguments and continue control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "334b", + "name": "Add valid sample action with mandatory arguments and drop control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "da69", + "name": "Add valid sample action with mandatory arguments and reclassify control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "13ce", + "name": "Add valid sample action with mandatory arguments and pipe control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "1886", + "name": "Add valid sample action with mandatory arguments and jump control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 200", + "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "b6d4", + "name": "Add sample action with mandatory arguments and invalid control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a874", + "name": "Add invalid sample action without mandatory arguments", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ac01", + "name": "Add invalid sample action without mandatory argument rate", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample group 10 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "4203", + "name": "Add invalid sample action without mandatory argument group", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 100 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "14a7", + "name": "Add invalid sample action without mandatory argument group", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 100 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "8f2e", + "name": "Add valid sample action with trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "45f8", + "name": "Add sample action with maximum rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "ad0c", + "name": "Add sample action with maximum trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "83a9", + "name": "Add sample action with maximum group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 1", + "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "ed27", + "name": "Add sample action with invalid rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2eae", + "name": "Add sample action with invalid group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 1", + "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "6ff3", + "name": "Add sample action with invalid trunc size", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 11", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2b2a", + "name": "Add sample action with invalid index", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 5294967299", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "dee2", + "name": "Add sample action with maximum allowed index", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 4294967295", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "560e", + "name": "Add sample action with cookie", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 45", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "704a", + "name": "Replace existing sample action with new rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "60eb", + "name": "Replace existing sample action with new group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "2cce", + "name": "Replace existing sample action with new trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 trunc 48 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "59d1", + "name": "Replace existing sample action with new control argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 reclassify index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 4510ddfa6e54..69ea09eefffc 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -1,7 +1,7 @@ [ { "id": "6f5a", - "name": "Add vlan pop action", + "name": "Add vlan pop action with pipe opcode", "category": [ "actions", "vlan" @@ -14,18 +14,18 @@ 255 ] ], - "cmdUnderTest": "$TC actions add action vlan pop index 8", + "cmdUnderTest": "$TC actions add action vlan pop pipe index 8", "expExitCode": "0", "verifyCmd": "$TC actions list action vlan", - "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref", + "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref", "matchCount": "1", "teardown": [ "$TC actions flush action vlan" ] }, { - "id": "ee6f", - "name": "Add vlan pop action with large index", + "id": "df35", + "name": "Add vlan pop action with pass opcode", "category": [ "actions", "vlan" @@ -38,10 +38,82 @@ 255 ] ], - "cmdUnderTest": "$TC actions add action vlan pop index 4294967295", + "cmdUnderTest": "$TC actions add action vlan pop pass index 8", "expExitCode": "0", - "verifyCmd": "$TC actions list action vlan", - "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref", + "verifyCmd": "$TC actions get action vlan index 8", + "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "b0d4", + "name": "Add vlan pop action with drop opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop drop index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 8", + "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "95ee", + "name": "Add vlan pop action with reclassify opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 8", + "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "0283", + "name": "Add vlan pop action with continue opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop continue index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 8", + "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref", "matchCount": "1", "teardown": [ "$TC actions flush action vlan" @@ -96,6 +168,74 @@ ] }, { + "id": "a178", + "name": "Add vlan pop action with invalid opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop foo index 8", + "expExitCode": "255", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ee6f", + "name": "Add vlan pop action with index at 32-bit maximum", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "0dfa", + "name": "Add vlan pop action with index exceeding 32-bit maximum", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599", + "expExitCode": "255", + "verifyCmd": "$TC actions get action vlan index 429496729599", + "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599", + "matchCount": "0", + "teardown": [] + }, + { "id": "2b91", "name": "Add vlan invalid action", "category": [ @@ -115,13 +255,11 @@ "verifyCmd": "$TC actions list action vlan", "matchPattern": "action order [0-9]+: vlan.*bad_mode", "matchCount": "0", - "teardown": [ - "$TC actions flush action vlan" - ] + "teardown": [] }, { "id": "57fc", - "name": "Add vlan action with invalid protocol type", + "name": "Add vlan push action with invalid protocol type", "category": [ "actions", "vlan" @@ -139,9 +277,7 @@ "verifyCmd": "$TC actions list action vlan", "matchPattern": "action order [0-9]+: vlan.*push", "matchCount": "0", - "teardown": [ - "$TC actions flush action vlan" - ] + "teardown": [] }, { "id": "3989", @@ -216,6 +352,30 @@ ] }, { + "id": "1f4b", + "name": "Add vlan push action with maximum 12-bit vlan ID", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 1", + "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { "id": "1f7b", "name": "Add vlan push action with invalid vlan ID", "category": [ @@ -240,6 +400,30 @@ ] }, { + "id": "fe40", + "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 1", + "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { "id": "5d02", "name": "Add vlan push action with invalid IEEE 802.1p priority", "category": [ @@ -259,9 +443,7 @@ "verifyCmd": "$TC actions list action vlan", "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref", "matchCount": "0", - "teardown": [ - "$TC actions flush action vlan" - ] + "teardown": [] }, { "id": "6812", @@ -312,6 +494,106 @@ ] }, { + "id": "3deb", + "name": "Replace existing vlan push action with new ID", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 500 pipe index 12" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 12", + "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "9e76", + "name": "Replace existing vlan push action with new protocol", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 1", + "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "ede4", + "name": "Replace existing vlan push action with new priority", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 1", + "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "d413", + "name": "Replace existing vlan pop action with new cookie", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan pop continue index 1 cookie 22334455" + ], + "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 1", + "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { "id": "83a4", "name": "Delete vlan pop action", "category": [ @@ -385,7 +667,7 @@ }, { "id": "1d78", - "name": "Add vlan action with cookie", + "name": "Add vlan push action with cookie", "category": [ "actions", "vlan" diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore index 2c8ac8416299..32a9eadb2d4e 100644 --- a/tools/testing/selftests/timers/.gitignore +++ b/tools/testing/selftests/timers/.gitignore @@ -9,7 +9,7 @@ nanosleep nsleep-lat posix_timers raw_skew -rtctest +rtcpie set-2038 set-tai set-timer-lat @@ -19,4 +19,3 @@ valid-adjtimex adjtick set-tz freq-step -rtctest_setdate diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 3496680981f2..c02683cfb6c9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -5,13 +5,13 @@ LDFLAGS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ - inconsistency-check raw_skew threadtest rtctest + inconsistency-check raw_skew threadtest rtcpie DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch freq-step leap-a-day \ leapcrash set-tai set-2038 set-tz -TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate +TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) include ../lib.mk diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c new file mode 100644 index 000000000000..47b5bad1b393 --- /dev/null +++ b/tools/testing/selftests/timers/rtcpie.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Real Time Clock Periodic Interrupt test program + * + * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for + * events"), PIE are completely handled using hrtimers, without actually using + * any underlying hardware RTC. + * + */ + +#include <stdio.h> +#include <linux/rtc.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data, old_pie_rate; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev] [d]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == EINVAL) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == EINVAL) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + ioctl(fd, RTC_IRQP_SET, old_pie_rate); + + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c deleted file mode 100644 index 411eff625e66..000000000000 --- a/tools/testing/selftests/timers/rtctest.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Real Time Clock Driver Test/Example Program - * - * Compile with: - * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest - * - * Copyright (C) 1996, Paul Gortmaker. - * - * Released under the GNU General Public License, version 2, - * included herein by reference. - * - */ - -#include <stdio.h> -#include <linux/rtc.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> - -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -/* - * This expects the new RTC class driver framework, working with - * clocks that will often not be clones of what the PC-AT had. - * Use the command line to specify another RTC if you need one. - */ -static const char default_rtc[] = "/dev/rtc0"; - -static struct rtc_time cutoff_dates[] = { - { - .tm_year = 70, /* 1970 -1900 */ - .tm_mday = 1, - }, - /* signed time_t 19/01/2038 3:14:08 */ - { - .tm_year = 138, - .tm_mday = 19, - }, - { - .tm_year = 138, - .tm_mday = 20, - }, - { - .tm_year = 199, /* 2099 -1900 */ - .tm_mday = 1, - }, - { - .tm_year = 200, /* 2100 -1900 */ - .tm_mday = 1, - }, - /* unsigned time_t 07/02/2106 7:28:15*/ - { - .tm_year = 205, - .tm_mon = 1, - .tm_mday = 7, - }, - { - .tm_year = 206, - .tm_mon = 1, - .tm_mday = 8, - }, - /* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/ - { - .tm_year = 362, - .tm_mon = 3, - .tm_mday = 12, - }, - { - .tm_year = 362, /* 2262 -1900 */ - .tm_mon = 3, - .tm_mday = 13, - }, -}; - -static int compare_dates(struct rtc_time *a, struct rtc_time *b) -{ - if (a->tm_year != b->tm_year || - a->tm_mon != b->tm_mon || - a->tm_mday != b->tm_mday || - a->tm_hour != b->tm_hour || - a->tm_min != b->tm_min || - ((b->tm_sec - a->tm_sec) > 1)) - return 1; - - return 0; -} - -int main(int argc, char **argv) -{ - int i, fd, retval, irqcount = 0, dangerous = 0; - unsigned long tmp, data; - struct rtc_time rtc_tm; - const char *rtc = default_rtc; - struct timeval start, end, diff; - - switch (argc) { - case 3: - if (*argv[2] == 'd') - dangerous = 1; - case 2: - rtc = argv[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - fprintf(stderr, "usage: rtctest [rtcdev] [d]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - - if (fd == -1) { - perror(rtc); - exit(errno); - } - - fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); - - /* Turn on update interrupts (one per second) */ - retval = ioctl(fd, RTC_UIE_ON, 0); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...Update IRQs not supported.\n"); - goto test_READ; - } - perror("RTC_UIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", - rtc); - fflush(stderr); - for (i=1; i<6; i++) { - /* This read will block */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); - fflush(stderr); - for (i=1; i<6; i++) { - struct timeval tv = {5, 0}; /* 5 second timeout on select */ - fd_set readfds; - - FD_ZERO(&readfds); - FD_SET(fd, &readfds); - /* The select will wait until an RTC interrupt happens. */ - retval = select(fd+1, &readfds, NULL, NULL, &tv); - if (retval == -1) { - perror("select"); - exit(errno); - } - /* This read won't block unlike the select-less case above. */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Turn off update interrupts */ - retval = ioctl(fd, RTC_UIE_OFF, 0); - if (retval == -1) { - perror("RTC_UIE_OFF ioctl"); - exit(errno); - } - -test_READ: - /* Read the RTC time/date */ - retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Set the alarm to 5 sec in the future, and check for rollover */ - rtc_tm.tm_sec += 5; - if (rtc_tm.tm_sec >= 60) { - rtc_tm.tm_sec %= 60; - rtc_tm.tm_min++; - } - if (rtc_tm.tm_min == 60) { - rtc_tm.tm_min = 0; - rtc_tm.tm_hour++; - } - if (rtc_tm.tm_hour == 24) - rtc_tm.tm_hour = 0; - - retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...Alarm IRQs not supported.\n"); - goto test_PIE; - } - - perror("RTC_ALM_SET ioctl"); - exit(errno); - } - - /* Read the current alarm settings */ - retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...EINVAL reading current alarm setting.\n"); - goto test_PIE; - } - perror("RTC_ALM_READ ioctl"); - exit(errno); - } - - fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Enable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_ON, 0); - if (retval == -1) { - if (errno == EINVAL || errno == EIO) { - fprintf(stderr, - "\n...Alarm IRQs not supported.\n"); - goto test_PIE; - } - - perror("RTC_AIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Waiting 5 seconds for alarm..."); - fflush(stderr); - /* This blocks until the alarm ring causes an interrupt */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - irqcount++; - fprintf(stderr, " okay. Alarm rang.\n"); - - /* Disable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_OFF, 0); - if (retval == -1) { - perror("RTC_AIE_OFF ioctl"); - exit(errno); - } - -test_PIE: - /* Read periodic IRQ rate */ - retval = ioctl(fd, RTC_IRQP_READ, &tmp); - if (retval == -1) { - /* not all RTCs support periodic IRQs */ - if (errno == EINVAL) { - fprintf(stderr, "\nNo periodic IRQ support\n"); - goto test_DATE; - } - perror("RTC_IRQP_READ ioctl"); - exit(errno); - } - fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); - - fprintf(stderr, "Counting 20 interrupts at:"); - fflush(stderr); - - /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ - for (tmp=2; tmp<=64; tmp*=2) { - - retval = ioctl(fd, RTC_IRQP_SET, tmp); - if (retval == -1) { - /* not all RTCs can change their periodic IRQ rate */ - if (errno == EINVAL) { - fprintf(stderr, - "\n...Periodic IRQ rate is fixed\n"); - goto test_DATE; - } - perror("RTC_IRQP_SET ioctl"); - exit(errno); - } - - fprintf(stderr, "\n%ldHz:\t", tmp); - fflush(stderr); - - /* Enable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_ON, 0); - if (retval == -1) { - perror("RTC_PIE_ON ioctl"); - exit(errno); - } - - for (i=1; i<21; i++) { - gettimeofday(&start, NULL); - /* This blocks */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - gettimeofday(&end, NULL); - timersub(&end, &start, &diff); - if (diff.tv_sec > 0 || - diff.tv_usec > ((1000000L / tmp) * 1.10)) { - fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", - diff.tv_sec, diff.tv_usec, - (1000000L / tmp)); - fflush(stdout); - exit(-1); - } - - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Disable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_OFF, 0); - if (retval == -1) { - perror("RTC_PIE_OFF ioctl"); - exit(errno); - } - } - -test_DATE: - if (!dangerous) - goto done; - - fprintf(stderr, "\nTesting problematic dates\n"); - - for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) { - struct rtc_time current; - - /* Write the new date in RTC */ - retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]); - if (retval == -1) { - perror("RTC_SET_TIME ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_RD_TIME, ¤t); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - if(compare_dates(&cutoff_dates[i], ¤t)) { - fprintf(stderr,"Setting date %d failed\n", - cutoff_dates[i].tm_year + 1900); - goto done; - } - - cutoff_dates[i].tm_sec += 5; - - /* Write the new alarm in RTC */ - retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]); - if (retval == -1) { - perror("RTC_ALM_SET ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_ALM_READ, ¤t); - if (retval == -1) { - perror("RTC_ALM_READ ioctl"); - exit(errno); - } - - if(compare_dates(&cutoff_dates[i], ¤t)) { - fprintf(stderr,"Setting alarm %d failed\n", - cutoff_dates[i].tm_year + 1900); - goto done; - } - - fprintf(stderr, "Setting year %d is OK \n", - cutoff_dates[i].tm_year + 1900); - } -done: - fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); - - close(fd); - - return 0; -} diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile new file mode 100644 index 000000000000..f7baa9aa2932 --- /dev/null +++ b/tools/testing/selftests/uevent/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +all: + +include ../lib.mk + +.PHONY: all clean + +BINARIES := uevent_filtering +CFLAGS += -Wl,-no-as-needed -Wall + +uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h + $(CC) $(CFLAGS) $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config new file mode 100644 index 000000000000..1038f4515be8 --- /dev/null +++ b/tools/testing/selftests/uevent/config @@ -0,0 +1,2 @@ +CONFIG_USER_NS=y +CONFIG_NET=y diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c new file mode 100644 index 000000000000..f83391aa42cf --- /dev/null +++ b/tools/testing/selftests/uevent/uevent_filtering.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/netlink.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sched.h> +#include <sys/eventfd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent" +#define __UEVENT_BUFFER_SIZE (2048 * 2) +#define __UEVENT_HEADER "add@/devices/virtual/mem/full" +#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full") +#define __UEVENT_LISTEN_ALL -1 + +ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = read(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (ret != pid) + goto again; + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + + return 0; +} + +static int uevent_listener(unsigned long post_flags, bool expect_uevent, + int sync_fd) +{ + int sk_fd, ret; + socklen_t sk_addr_len; + int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE; + uint64_t sync_add = 1; + struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 }; + char buf[__UEVENT_BUFFER_SIZE] = { 0 }; + struct iovec iov = { buf, __UEVENT_BUFFER_SIZE }; + char control[CMSG_SPACE(sizeof(struct ucred))]; + struct msghdr hdr = { + &rcv_addr, sizeof(rcv_addr), &iov, 1, + control, sizeof(control), 0, + }; + + sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, + NETLINK_KOBJECT_UEVENT); + if (sk_fd < 0) { + fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno)); + return -1; + } + + ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, + sizeof(rcv_buf_sz)); + if (ret < 0) { + fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno)); + goto on_error; + } + + sk_addr.nl_family = AF_NETLINK; + sk_addr.nl_groups = __UEVENT_LISTEN_ALL; + + sk_addr_len = sizeof(sk_addr); + ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno)); + goto on_error; + } + + ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno)); + goto on_error; + } + + if ((size_t)sk_addr_len != sizeof(sk_addr)) { + fprintf(stderr, "Invalid socket address size\n"); + goto on_error; + } + + if (post_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + goto on_error; + } + } + + if (post_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + goto on_error; + } + } + + ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add)); + close(sync_fd); + if (ret != sizeof(sync_add)) { + fprintf(stderr, "Failed to synchronize with parent process\n"); + goto on_error; + } + + fret = 0; + for (;;) { + ssize_t r; + + r = recvmsg(sk_fd, &hdr, 0); + if (r <= 0) { + fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno)); + ret = -1; + break; + } + + /* ignore libudev messages */ + if (memcmp(buf, "libudev", 8) == 0) + continue; + + /* ignore uevents we didn't trigger */ + if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0) + continue; + + if (!expect_uevent) { + fprintf(stderr, "Received unexpected uevent:\n"); + ret = -1; + } + + if (TH_LOG_ENABLED) { + /* If logging is enabled dump the received uevent. */ + (void)write_nointr(STDERR_FILENO, buf, r); + (void)write_nointr(STDERR_FILENO, "\n", 1); + } + + break; + } + +on_error: + close(sk_fd); + + return fret; +} + +int trigger_uevent(unsigned int times) +{ + int fd, ret; + unsigned int i; + + fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return -EINVAL; + + return -1; + } + + for (i = 0; i < times; i++) { + ret = write_nointr(fd, "add\n", sizeof("add\n") - 1); + if (ret < 0) { + fprintf(stderr, "Failed to trigger uevent\n"); + break; + } + } + close(fd); + + return ret; +} + +int set_death_signal(void) +{ + int ret; + pid_t ppid; + + ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + + /* Check whether we have been orphaned. */ + ppid = getppid(); + if (ppid == 1) { + pid_t self; + + self = getpid(); + ret = kill(self, SIGKILL); + } + + if (ret < 0) + return -1; + + return 0; +} + +static int do_test(unsigned long pre_flags, unsigned long post_flags, + bool expect_uevent, int sync_fd) +{ + int ret; + uint64_t wait_val; + pid_t pid; + sigset_t mask; + sigset_t orig_mask; + struct timespec timeout; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + + ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask); + if (ret < 0) { + fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno)); + return -1; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno)); + return -1; + } + + if (pid == 0) { + /* Make sure that we go away when our parent dies. */ + ret = set_death_signal(); + if (ret < 0) { + fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n"); + _exit(EXIT_FAILURE); + } + + if (pre_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (pre_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val)); + if (ret != sizeof(wait_val)) { + fprintf(stderr, "Failed to synchronize with child process\n"); + _exit(EXIT_FAILURE); + } + + /* Trigger 10 uevents to account for the case where the kernel might + * drop some. + */ + ret = trigger_uevent(10); + if (ret < 0) + fprintf(stderr, "Failed triggering uevents\n"); + + /* Wait for 2 seconds before considering this failed. This should be + * plenty of time for the kernel to deliver the uevent even under heavy + * load. + */ + timeout.tv_sec = 2; + timeout.tv_nsec = 0; + +again: + ret = sigtimedwait(&mask, NULL, &timeout); + if (ret < 0) { + if (errno == EINTR) + goto again; + + if (!expect_uevent) + ret = kill(pid, SIGTERM); /* success */ + else + ret = kill(pid, SIGUSR1); /* error */ + if (ret < 0) + return -1; + } + + ret = wait_for_pid(pid); + if (ret < 0) + return -1; + + return ret; +} + +static void signal_handler(int sig) +{ + if (sig == SIGTERM) + _exit(EXIT_SUCCESS); + + _exit(EXIT_FAILURE); +} + +TEST(uevent_filtering) +{ + int ret, sync_fd; + struct sigaction act; + + if (geteuid()) { + TH_LOG("Uevent filtering tests require root privileges. Skipping test"); + _exit(KSFT_SKIP); + } + + ret = access(__DEV_FULL, F_OK); + EXPECT_EQ(0, ret) { + if (errno == ENOENT) { + TH_LOG(__DEV_FULL " does not exist. Skipping test"); + _exit(KSFT_SKIP); + } + + _exit(KSFT_FAIL); + } + + act.sa_handler = signal_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + ret = sigaction(SIGTERM, &act, NULL); + ASSERT_EQ(0, ret); + + sync_fd = eventfd(0, EFD_CLOEXEC); + ASSERT_GE(sync_fd, 0); + + /* + * Setup: + * - Open uevent listening socket in initial network namespace owned by + * initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWNET, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - unshare user namespace + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWUSER, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by non-initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives no uevent + */ + ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + +do_cleanup: + close(sync_fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh index d60506fc77f8..f9b31a57439b 100755 --- a/tools/testing/selftests/user/test_user_copy.sh +++ b/tools/testing/selftests/user/test_user_copy.sh @@ -2,6 +2,13 @@ # SPDX-License-Identifier: GPL-2.0 # Runs copy_to/from_user infrastructure using test_user_copy kernel module +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_user_copy; then + echo "user: module test_user_copy is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test_user_copy; then /sbin/modprobe -q -r test_user_copy echo "user_copy: ok" diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 1097f04e4d80..bcec71250873 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -16,6 +16,8 @@ #include <unistd.h> #include <string.h> +#include "../kselftest.h" + #define MAP_SIZE 1048576 struct map_list { @@ -169,7 +171,7 @@ int main(int argc, char **argv) printf("Either the sysctl compact_unevictable_allowed is not\n" "set to 1 or couldn't read the proc file.\n" "Skipping the test\n"); - return 0; + return KSFT_SKIP; } lim.rlim_cur = RLIM_INFINITY; diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index 4997b9222cfa..637b6d0ac0d0 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -9,6 +9,8 @@ #include <stdbool.h> #include "mlock2.h" +#include "../kselftest.h" + struct vm_boundaries { unsigned long start; unsigned long end; @@ -303,7 +305,7 @@ static int test_mlock_lock() if (mlock2_(map, 2 * page_size, 0)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(0)"); goto unmap; @@ -412,7 +414,7 @@ static int test_mlock_onfault() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -425,7 +427,7 @@ static int test_mlock_onfault() if (munlock(map, 2 * page_size)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("munlock()"); goto unmap; @@ -457,7 +459,7 @@ static int test_lock_onfault_of_present() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock) if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock(ONFAULT)\n"); goto out; diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 22d564673830..88cbe5575f0c 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 #please run as root +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + mnt=./huge exitcode=0 @@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages if [ $? -ne 0 ]; then echo "Please run this test as root" - exit 1 + exit $ksft_skip fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index de2f9ec8a87f..7b8171e3128a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -69,6 +69,8 @@ #include <setjmp.h> #include <stdbool.h> +#include "../kselftest.h" + #ifdef __NR_userfaultfd static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; @@ -1322,7 +1324,7 @@ int main(int argc, char **argv) int main(void) { printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return 0; + return KSFT_SKIP; } #endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 39f66bc29b82..186520198de7 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -8,6 +8,7 @@ include ../lib.mk UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ @@ -31,7 +32,12 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +# call32_from_64 in thunks.S uses absolute addresses. +ifeq ($(CAN_BUILD_WITH_NOPIE),1) +CFLAGS += -no-pie +endif define gen-target-rule-32 $(1) $(1)_32: $(OUTPUT)/$(1)_32 diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 246145b84a12..4d9dc3f2fd70 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -610,21 +610,41 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) */ for (int i = 0; i < NGREG; i++) { greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ - if (i == REG_SP) { - printf("\tSP: %llx -> %llx\n", (unsigned long long)req, - (unsigned long long)res); + if (i == REG_SP) { /* - * In many circumstances, the high 32 bits of rsp - * are zeroed. For example, we could be a real - * 32-bit program, or we could hit any of a number - * of poorly-documented IRET or segmented ESP - * oddities. If this happens, it's okay. + * If we were using a 16-bit stack segment, then + * the kernel is a bit stuck: IRET only restores + * the low 16 bits of ESP/RSP if SS is 16-bit. + * The kernel uses a hack to restore bits 31:16, + * but that hack doesn't help with bits 63:32. + * On Intel CPUs, bits 63:32 end up zeroed, and, on + * AMD CPUs, they leak the high bits of the kernel + * espfix64 stack pointer. There's very little that + * the kernel can do about it. + * + * Similarly, if we are returning to a 32-bit context, + * the CPU will often lose the high 32 bits of RSP. */ - if (res == (req & 0xFFFFFFFF)) - continue; /* OK; not expected to work */ + + if (res == req) + continue; + + if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { + printf("[NOTE]\tSP: %llx -> %llx\n", + (unsigned long long)req, + (unsigned long long)res); + continue; + } + + printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; } bool ignore_reg = false; @@ -654,25 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) #endif /* Sanity check on the kernel */ - if (i == REG_CX && requested_regs[i] != resulting_regs[i]) { + if (i == REG_CX && req != res) { printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", - (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + (unsigned long long)req, + (unsigned long long)res); nerrs++; continue; } - if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { - /* - * SP is particularly interesting here. The - * usual cause of failures is that we hit the - * nasty IRET case of returning to a 16-bit SS, - * in which case bits 16:31 of the *kernel* - * stack pointer persist in ESP. - */ + if (req != res && !ignore_reg) { printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", - i, (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + i, (unsigned long long)req, + (unsigned long long)res); nerrs++; } } diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c new file mode 100644 index 000000000000..46a447163b93 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_program.c @@ -0,0 +1,10 @@ +/* Trivial program to check that compilation with certain flags is working. */ + +#include <stdio.h> + +int +main(void) +{ + puts(""); + return 0; +} diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 754de7da426a..232e958ec454 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./zram_lib.sh run_zram () { @@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then else echo "$TCID : No zram.ko module or /dev/zram0 device file not found" echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + exit $ksft_skip fi diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f6a9c73e7a44..9e73a4fb9b0a 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -18,6 +18,9 @@ MODULE=0 dev_makeswap=-1 dev_mounted=-1 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + trap INT check_prereqs() @@ -27,7 +30,7 @@ check_prereqs() if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index c9c81614a66a..4204359c9fee 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -135,11 +135,11 @@ static int refresh_imported_device_list(void) return 0; } -static int get_nports(void) +static int get_nports(struct udev_device *hc_device) { const char *attr_nports; - attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports"); + attr_nports = udev_device_get_sysattr_value(hc_device, "nports"); if (!attr_nports) { err("udev_device_get_sysattr_value nports failed"); return -1; @@ -242,35 +242,41 @@ static int read_record(int rhport, char *host, unsigned long host_len, int usbip_vhci_driver_open(void) { + int nports; + struct udev_device *hc_device; + udev_context = udev_new(); if (!udev_context) { err("udev_new failed"); return -1; } - vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver)); - /* will be freed in usbip_driver_close() */ - vhci_driver->hc_device = + hc_device = udev_device_new_from_subsystem_sysname(udev_context, USBIP_VHCI_BUS_TYPE, USBIP_VHCI_DEVICE_NAME); - if (!vhci_driver->hc_device) { + if (!hc_device) { err("udev_device_new_from_subsystem_sysname failed"); goto err; } - vhci_driver->nports = get_nports(); - dbg("available ports: %d", vhci_driver->nports); - - if (vhci_driver->nports <= 0) { + nports = get_nports(hc_device); + if (nports <= 0) { err("no available ports"); goto err; - } else if (vhci_driver->nports > MAXNPORT) { - err("port number exceeds %d", MAXNPORT); + } + dbg("available ports: %d", nports); + + vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) + + nports * sizeof(struct usbip_imported_device)); + if (!vhci_driver) { + err("vhci_driver allocation failed"); goto err; } + vhci_driver->nports = nports; + vhci_driver->hc_device = hc_device; vhci_driver->ncontrollers = get_ncontrollers(); dbg("available controllers: %d", vhci_driver->ncontrollers); @@ -285,7 +291,7 @@ int usbip_vhci_driver_open(void) return 0; err: - udev_device_unref(vhci_driver->hc_device); + udev_device_unref(hc_device); if (vhci_driver) free(vhci_driver); diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h index 418b404d5121..6c9aca216705 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.h +++ b/tools/usb/usbip/libsrc/vhci_driver.h @@ -13,7 +13,6 @@ #define USBIP_VHCI_BUS_TYPE "platform" #define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0" -#define MAXNPORT 128 enum hub_speed { HUB_SPEED_HIGH = 0, @@ -41,7 +40,7 @@ struct usbip_vhci_driver { int ncontrollers; int nports; - struct usbip_imported_device idev[MAXNPORT]; + struct usbip_imported_device idev[]; }; diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c index 9db9d21bb2ec..777f7286a0c5 100644 --- a/tools/usb/usbip/src/usbip_detach.c +++ b/tools/usb/usbip/src/usbip_detach.c @@ -43,9 +43,12 @@ void usbip_detach_usage(void) static int detach_port(char *port) { - int ret; + int ret = 0; uint8_t portnum; char path[PATH_MAX+1]; + int i; + struct usbip_imported_device *idev; + int found = 0; unsigned int port_len = strlen(port); @@ -55,27 +58,48 @@ static int detach_port(char *port) return -1; } - /* check max port */ - portnum = atoi(port); - /* remove the port state file */ + ret = usbip_vhci_driver_open(); + if (ret < 0) { + err("open vhci_driver"); + return -1; + } + + /* check for invalid port */ + for (i = 0; i < vhci_driver->nports; i++) { + idev = &vhci_driver->idev[i]; + + if (idev->port == portnum) { + found = 1; + if (idev->status != VDEV_ST_NULL) + break; + info("Port %d is already detached!\n", idev->port); + goto call_driver_close; + } + } + if (!found) { + err("Invalid port %s > maxports %d", + port, vhci_driver->nports); + goto call_driver_close; + } + + /* remove the port state file */ snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum); remove(path); rmdir(VHCI_STATE_PATH); - ret = usbip_vhci_driver_open(); + ret = usbip_vhci_detach_device(portnum); if (ret < 0) { - err("open vhci_driver"); - return -1; + ret = -1; + err("Port %d detach request failed!\n", portnum); + goto call_driver_close; } + info("Port %d is now detached!\n", portnum); - ret = usbip_vhci_detach_device(portnum); - if (ret < 0) - return -1; - +call_driver_close: usbip_vhci_driver_close(); return ret; diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 1571e24e9494..f91aeb5fe571 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -6,8 +6,6 @@ # error Virtio userspace code does not support CONFIG_HAS_DMA #endif -#define PCI_DMA_BUS_IS_PHYS 1 - enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h index 9a45f90e2d08..369ee308b668 100644 --- a/tools/virtio/linux/scatterlist.h +++ b/tools/virtio/linux/scatterlist.h @@ -36,7 +36,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & 0x03); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -67,7 +66,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~0x3); @@ -116,9 +114,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -136,17 +131,11 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~0x02; } static inline struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -160,13 +149,6 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) { memset(sgl, 0, sizeof(*sgl) * nents); -#ifdef CONFIG_DEBUG_SG - { - unsigned int i; - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; - } -#endif sg_mark_end(&sgl[nents - 1]); } diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index a8783f48f77f..cce853dca691 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -131,6 +131,7 @@ static const char * const page_flag_names[] = { [KPF_KSM] = "x:ksm", [KPF_THP] = "t:thp", [KPF_BALLOON] = "o:balloon", + [KPF_PGTABLE] = "g:pgtable", [KPF_ZERO_PAGE] = "z:zero_page", [KPF_IDLE] = "i:idle_page", |