diff options
Diffstat (limited to 'tools')
220 files changed, 14246 insertions, 4234 deletions
diff --git a/tools/Makefile b/tools/Makefile index db2f7b8ebed5..c074e42fd92f 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -24,6 +24,7 @@ help: @echo ' intel-speed-select - Intel Speed Select tool' @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' + @echo ' nolibc - nolibc headers testing and installation' @echo ' objtool - an ELF object analysis tool' @echo ' pci - PCI tools' @echo ' perf - Linux performance measurement and analysis tool' @@ -31,6 +32,9 @@ help: @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' thermometer - temperature capture tool' + @echo ' thermal-engine - thermal monitoring tool' + @echo ' thermal - thermal library' @echo ' tracing - misc tracing tools' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @@ -74,6 +78,9 @@ bpf/%: FORCE libapi: FORCE $(call descend,lib/api) +nolibc_%: FORCE + $(call descend,include/nolibc,$(patsubst nolibc_%,%,$@)) + # The perf build does not follow the descend function setup, # invoking it via it's own make rule. PERF_O = $(if $(O),$(O)/tools/perf,) @@ -85,12 +92,21 @@ perf: FORCE selftests: FORCE $(call descend,testing/$@) +thermal: FORCE + $(call descend,lib/$@) + turbostat x86_energy_perf_policy intel-speed-select: FORCE $(call descend,power/x86/$@) tmon: FORCE $(call descend,thermal/$@) +thermometer: FORCE + $(call descend,thermal/$@) + +thermal-engine: FORCE thermal + $(call descend,thermal/$@) + freefall: FORCE $(call descend,laptop/$@) @@ -101,7 +117,7 @@ all: acpi cgroup counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - pci debugging tracing + pci debugging tracing thermal thermometer thermal-engine acpi_install: $(call descend,power/$(@:_install=),install) @@ -115,12 +131,21 @@ cgroup_install counter_install firewire_install gpio_install hv_install iio_inst selftests_install: $(call descend,testing/$(@:_install=),install) +thermal_install: + $(call descend,lib/$(@:_install=),install) + turbostat_install x86_energy_perf_policy_install intel-speed-select_install: $(call descend,power/x86/$(@:_install=),install) tmon_install: $(call descend,thermal/$(@:_install=),install) +thermometer_install: + $(call descend,thermal/$(@:_install=),install) + +thermal-engine_install: + $(call descend,thermal/$(@:_install=),install) + freefall_install: $(call descend,laptop/$(@:_install=),install) @@ -133,7 +158,7 @@ install: acpi_install cgroup_install counter_install cpupower_install gpio_insta virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ wmi_install pci_install debugging_install intel-speed-select_install \ - tracing_install + tracing_install thermometer_install thermal-engine_install acpi_clean: $(call descend,power/acpi,clean) @@ -160,9 +185,18 @@ perf_clean: selftests_clean: $(call descend,testing/$(@:_clean=),clean) +thermal_clean: + $(call descend,lib/thermal,clean) + turbostat_clean x86_energy_perf_policy_clean intel-speed-select_clean: $(call descend,power/x86/$(@:_clean=),clean) +thermometer_clean: + $(call descend,thermal/thermometer,clean) + +thermal-engine_clean: + $(call descend,thermal/thermal-engine,clean) + tmon_clean: $(call descend,thermal/tmon,clean) @@ -177,6 +211,6 @@ clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_cl vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ - intel-speed-select_clean tracing_clean + intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean .PHONY: FORCE diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h index 174e7d83fcbd..765e9e752d03 100644 --- a/tools/arch/x86/include/asm/amd-ibs.h +++ b/tools/arch/x86/include/asm/amd-ibs.h @@ -49,7 +49,7 @@ union ibs_op_ctl { }; }; -/* MSR 0xc0011035: IBS Op Data 2 */ +/* MSR 0xc0011035: IBS Op Data 1 */ union ibs_op_data { __u64 val; struct { diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 1231d63f836d..1ae0fab7d902 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -10,12 +10,6 @@ * cpu_feature_enabled(). */ -#ifdef CONFIG_X86_SMAP -# define DISABLE_SMAP 0 -#else -# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) -#endif - #ifdef CONFIG_X86_UMIP # define DISABLE_UMIP 0 #else @@ -80,7 +74,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) +#define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ae61f464043a..c6a48d0ef9ff 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA := \ llvm-version \ clang \ libbpf \ + libbpf-btf__load_from_kernel_by_id \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index de66e1cc0734..cb4a2a4fa2e4 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -57,6 +57,7 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-libbpf.bin \ + test-libbpf-btf__load_from_kernel_by_id.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -287,6 +288,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-libbpf.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c new file mode 100644 index 000000000000..f7c084428735 --- /dev/null +++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/libbpf.h> + +int main(void) +{ + return btf__load_from_kernel_by_id(20151128, NULL); +} diff --git a/tools/certs/print-cert-tbs-hash.sh b/tools/certs/print-cert-tbs-hash.sh new file mode 100755 index 000000000000..c93df5387ec9 --- /dev/null +++ b/tools/certs/print-cert-tbs-hash.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright © 2020, Microsoft Corporation. All rights reserved. +# +# Author: Mickaël Salaün <mic@linux.microsoft.com> +# +# Compute and print the To Be Signed (TBS) hash of a certificate. This is used +# as description of keys in the blacklist keyring to identify certificates. +# This output should be redirected, without newline, in a file (hash0.txt) and +# signed to create a PKCS#7 file (hash0.p7s). Both of these files can then be +# loaded in the kernel with. +# +# Exemple on a workstation: +# ./print-cert-tbs-hash.sh certificate-to-invalidate.pem > hash0.txt +# openssl smime -sign -in hash0.txt -inkey builtin-private-key.pem \ +# -signer builtin-certificate.pem -certfile certificate-chain.pem \ +# -noattr -binary -outform DER -out hash0.p7s +# +# Exemple on a managed system: +# keyctl padd blacklist "$(< hash0.txt)" %:.blacklist < hash0.p7s + +set -u -e -o pipefail + +CERT="${1:-}" +BASENAME="$(basename -- "${BASH_SOURCE[0]}")" + +if [ $# -ne 1 ] || [ ! -f "${CERT}" ]; then + echo "usage: ${BASENAME} <certificate>" >&2 + exit 1 +fi + +# Checks that it is indeed a certificate (PEM or DER encoded) and exclude the +# optional PEM text header. +if ! PEM="$(openssl x509 -inform DER -in "${CERT}" 2>/dev/null || openssl x509 -in "${CERT}")"; then + echo "ERROR: Failed to parse certificate" >&2 + exit 1 +fi + +# TBSCertificate starts at the second entry. +# Cf. https://tools.ietf.org/html/rfc3280#section-4.1 +# +# Exemple of first lines printed by openssl asn1parse: +# 0:d=0 hl=4 l= 763 cons: SEQUENCE +# 4:d=1 hl=4 l= 483 cons: SEQUENCE +# 8:d=2 hl=2 l= 3 cons: cont [ 0 ] +# 10:d=3 hl=2 l= 1 prim: INTEGER :02 +# 13:d=2 hl=2 l= 20 prim: INTEGER :3CEB2CB8818D968AC00EEFE195F0DF9665328B7B +# 35:d=2 hl=2 l= 13 cons: SEQUENCE +# 37:d=3 hl=2 l= 9 prim: OBJECT :sha256WithRSAEncryption +RANGE_AND_DIGEST_RE=' +2s/^\s*\([0-9]\+\):d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*\([0-9]\+\)\s\+cons:\s*SEQUENCE\s*$/\1 \2/p; +7s/^\s*[0-9]\+:d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*[0-9]\+\s\+prim:\s*OBJECT\s*:\(.*\)$/\1/p; +' + +RANGE_AND_DIGEST=($(echo "${PEM}" | \ + openssl asn1parse -in - | \ + sed -n -e "${RANGE_AND_DIGEST_RE}")) + +if [ "${#RANGE_AND_DIGEST[@]}" != 3 ]; then + echo "ERROR: Failed to parse TBSCertificate." >&2 + exit 1 +fi + +OFFSET="${RANGE_AND_DIGEST[0]}" +END="$(( OFFSET + RANGE_AND_DIGEST[1] ))" +DIGEST="${RANGE_AND_DIGEST[2]}" + +# The signature hash algorithm is used by Linux to blacklist certificates. +# Cf. crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo() +DIGEST_MATCH="" +while read -r DIGEST_ITEM; do + if [ -z "${DIGEST_ITEM}" ]; then + break + fi + if echo "${DIGEST}" | grep -qiF "${DIGEST_ITEM}"; then + DIGEST_MATCH="${DIGEST_ITEM}" + break + fi +done < <(openssl list -digest-commands | tr ' ' '\n' | sort -ur) + +if [ -z "${DIGEST_MATCH}" ]; then + echo "ERROR: Unknown digest algorithm: ${DIGEST}" >&2 + exit 1 +fi + +echo "${PEM}" | \ + openssl x509 -in - -outform DER | \ + dd "bs=1" "skip=${OFFSET}" "count=${END}" "status=none" | \ + openssl dgst "-${DIGEST_MATCH}" - | \ + awk '{printf "tbs:" $2}' diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 586d35720f13..6491fa8fba6d 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -38,7 +38,9 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 -#ifdef CONFIG_STACK_VALIDATION +#ifdef CONFIG_OBJTOOL + +#include <asm/asm.h> #ifndef __ASSEMBLY__ @@ -137,7 +139,7 @@ struct unwind_hint { .macro STACK_FRAME_NON_STANDARD func:req .pushsection .discard.func_stack_frame_non_standard, "aw" - .long \func - . + _ASM_PTR \func .popsection .endm @@ -157,7 +159,7 @@ struct unwind_hint { #endif /* __ASSEMBLY__ */ -#else /* !CONFIG_STACK_VALIDATION */ +#else /* !CONFIG_OBJTOOL */ #ifndef __ASSEMBLY__ @@ -179,6 +181,6 @@ struct unwind_hint { .endm #endif -#endif /* CONFIG_STACK_VALIDATION */ +#endif /* CONFIG_OBJTOOL */ #endif /* _LINUX_OBJTOOL_H */ diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile new file mode 100644 index 000000000000..7a16d917c185 --- /dev/null +++ b/tools/include/nolibc/Makefile @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc installation and tests +include ../../scripts/Makefile.include + +# we're in ".../tools/include/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR))) +endif + +nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) +arch_file := arch-$(nolibc_arch).h +all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \ + sys.h time.h types.h unistd.h + +# install all headers needed to support a bare-metal compiler +all: + +# Note: when ARCH is "x86" we concatenate both x86_64 and i386 +headers: + $(Q)mkdir -p $(OUTPUT)sysroot + $(Q)mkdir -p $(OUTPUT)sysroot/include + $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ + $(Q)if [ "$(ARCH)" = "x86" ]; then \ + sed -e \ + 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ + arch-x86_64.h; \ + sed -e \ + 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ + arch-i386.h; \ + elif [ -e "$(arch_file)" ]; then \ + cat $(arch_file); \ + else \ + echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \ + exit 1; \ + fi > $(OUTPUT)sysroot/include/arch.h + +headers_standalone: headers + $(Q)$(MAKE) -C $(srctree) headers + $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot + +clean: + $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h new file mode 100644 index 000000000000..f68baf8f395f --- /dev/null +++ b/tools/include/nolibc/arch-aarch64.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * AARCH64 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_AARCH64_H +#define _NOLIBC_ARCH_AARCH64_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the newfstatat() syscall. Differs slightly from the + * x86_64's stat one by field ordering, so be careful. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + + long st_blocks; + long st_atime; + unsigned long st_atime_nsec; + long st_mtime; + + unsigned long st_mtime_nsec; + long st_ctime; + unsigned long st_ctime_nsec; + unsigned int __unused[2]; +}; + +/* Syscalls for AARCH64 : + * - registers are 64-bit + * - stack is 16-byte aligned + * - syscall number is passed in x8 + * - arguments are in x0, x1, x2, x3, x4, x5 + * - the system call is performed by calling svc 0 + * - syscall return comes in x0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * + * On aarch64, select() is not implemented so we have to use pselect6(). + */ +#define __ARCH_WANT_SYS_PSELECT6 + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0"); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + register long _arg5 __asm__ ("x4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + register long _arg5 __asm__ ("x4") = (long)(arg5); \ + register long _arg6 __asm__ ("x5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +__asm__ (".section .text\n" + ".weak _start\n" + "_start:\n" + "ldr x0, [sp]\n" // argc (x0) was in the stack + "add x1, sp, 8\n" // argv (x1) = sp + "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... + "add x2, x2, 8\n" // + 8 (skip null) + "add x2, x2, x1\n" // + argv + "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee + "bl main\n" // main() returns the status code, we'll exit with it. + "mov x8, 93\n" // NR_exit == 93 + "svc #0\n" + ""); + +#endif // _NOLIBC_ARCH_AARCH64_H diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h new file mode 100644 index 000000000000..f31be8e967d6 --- /dev/null +++ b/tools/include/nolibc/arch-arm.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ARM specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_ARM_H +#define _NOLIBC_ARCH_ARM_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). In big endian, the format + * differs as devices are returned as short only. + */ +struct sys_stat_struct { +#if defined(__ARMEB__) + unsigned short st_dev; + unsigned short __pad1; +#else + unsigned long st_dev; +#endif + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + +#if defined(__ARMEB__) + unsigned short st_rdev; + unsigned short __pad2; +#else + unsigned long st_rdev; +#endif + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +/* Syscalls for ARM in ARM or Thumb modes : + * - registers are 32-bit + * - stack is 8-byte aligned + * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) + * - syscall number is passed in r7 + * - arguments are in r0, r1, r2, r3, r4, r5 + * - the system call is performed by calling svc #0 + * - syscall return comes in r0. + * - only lr is clobbered. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, ARM supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + register long _arg4 __asm__ ("r3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r7") = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + register long _arg4 __asm__ ("r3") = (long)(arg4); \ + register long _arg5 __asm__ ("r4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +/* startup code */ +__asm__ (".section .text\n" + ".weak _start\n" + "_start:\n" +#if defined(__THUMBEB__) || defined(__THUMBEL__) + /* We enter here in 32-bit mode but if some previous functions were in + * 16-bit mode, the assembler cannot know, so we need to tell it we're in + * 32-bit now, then switch to 16-bit (is there a better way to do it than + * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that + * it generates correct instructions. Note that we do not support thumb1. + */ + ".code 32\n" + "add r0, pc, #1\n" + "bx r0\n" + ".code 16\n" +#endif + "pop {%r0}\n" // argc was in the stack + "mov %r1, %sp\n" // argv = sp + "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... + "add %r2, %r2, $4\n" // ... + 4 + "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the + "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) + "bl main\n" // main() returns the status code, we'll exit with it. + "movs r7, $1\n" // NR_exit == 1 + "svc $0x00\n" + ""); + +#endif // _NOLIBC_ARCH_ARM_H diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h new file mode 100644 index 000000000000..d7e7212346e2 --- /dev/null +++ b/tools/include/nolibc/arch-i386.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * i386 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_I386_H +#define _NOLIBC_ARCH_I386_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +__asm__ (".section .text\n" + ".weak _start\n" + "_start:\n" + "pop %eax\n" // argc (first arg, %eax) + "mov %esp, %ebx\n" // argv[] (second arg, %ebx) + "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) + "push %ecx\n" // push all registers on the stack so that we + "push %ebx\n" // support both regparm and plain stack modes + "push %eax\n" + "call main\n" // main() returns the status code in %eax + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now + "hlt\n" // ensure it does not + ""); + +#endif // _NOLIBC_ARCH_I386_H diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h new file mode 100644 index 000000000000..5fc5b8029bff --- /dev/null +++ b/tools/include/nolibc/arch-mips.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * MIPS specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_MIPS_H +#define _NOLIBC_ARCH_MIPS_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_APPEND 0x0008 +#define O_NONBLOCK 0x0080 +#define O_CREAT 0x0100 +#define O_TRUNC 0x0200 +#define O_EXCL 0x0400 +#define O_NOCTTY 0x0800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall. 88 bytes are returned by the + * syscall. + */ +struct sys_stat_struct { + unsigned int st_dev; + long st_pad1[3]; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + long st_pad2[2]; + long st_size; + long st_pad3; + + long st_atime; + long st_atime_nsec; + long st_mtime; + long st_mtime_nsec; + + long st_ctime; + long st_ctime_nsec; + long st_blksize; + long st_blocks; + long st_pad4[14]; +}; + +/* Syscalls for MIPS ABI O32 : + * - WARNING! there's always a delayed slot! + * - WARNING again, the syntax is different, registers take a '$' and numbers + * do not. + * - registers are 32-bit + * - stack is 8-byte aligned + * - syscall number is passed in v0 (starts at 0xfa0). + * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to + * leave some room in the stack for the callee to save a0..a3 if needed. + * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are + * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as + * scall32-o32.S in the kernel sources. + * - the system call is performed by calling "syscall" + * - syscall return comes in v0, and register a3 needs to be checked to know + * if an error occurred, in which case errno is in v0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "r"(_num) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "syscall\n " \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ + ); \ + _arg4 ? -_num : _num; \ +}) + +/* startup code, note that it's called __start on MIPS */ +__asm__ (".section .text\n" + ".weak __start\n" + ".set nomips16\n" + ".set noreorder\n" + ".option pic0\n" + ".ent __start\n" + "__start:\n" + "lw $a0,($sp)\n" // argc was in the stack + "addiu $a1, $sp, 4\n" // argv = sp + 4 + "sll $a2, $a0, 2\n" // a2 = argc * 4 + "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... + "addiu $a2, $a2, 4\n" // ... + 4 + "li $t0, -8\n" + "and $sp, $sp, $t0\n" // sp must be 8-byte aligned + "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! + "jal main\n" // main() returns the status code, we'll exit with it. + "nop\n" // delayed slot + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 + "li $v0, 4001\n" // NR_exit == 4001 + "syscall\n" + ".end __start\n" + ""); + +#endif // _NOLIBC_ARCH_MIPS_H diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h new file mode 100644 index 000000000000..95e2b7924925 --- /dev/null +++ b/tools/include/nolibc/arch-riscv.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * RISCV (32 and 64) specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_RISCV_H +#define _NOLIBC_ARCH_RISCV_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x100 +#define O_EXCL 0x200 +#define O_NOCTTY 0x400 +#define O_TRUNC 0x1000 +#define O_APPEND 0x2000 +#define O_NONBLOCK 0x4000 +#define O_DIRECTORY 0x200000 + +struct sys_stat_struct { + unsigned long st_dev; /* Device. */ + unsigned long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long st_rdev; /* Device number, if device. */ + unsigned long __pad1; + long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long st_blocks; /* Number 512-byte blocks allocated. */ + long st_atime; /* Time of last access. */ + unsigned long st_atime_nsec; + long st_mtime; /* Time of last modification. */ + unsigned long st_mtime_nsec; + long st_ctime; /* Time of last status change. */ + unsigned long st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; +}; + +#if __riscv_xlen == 64 +#define PTRLOG "3" +#define SZREG "8" +#elif __riscv_xlen == 32 +#define PTRLOG "2" +#define SZREG "4" +#endif + +/* Syscalls for RISCV : + * - stack is 16-byte aligned + * - syscall number is passed in a7 + * - arguments are in a0, a1, a2, a3, a4, a5 + * - the system call is performed by calling ecall + * - syscall return comes in a0 + * - the arguments are cast to long and assigned into the target + * registers which are then simply passed as registers to the asm code, + * so that we don't have to experience issues with register constraints. + * + * On riscv, select() is not implemented so we have to use pselect6(). + */ +#define __ARCH_WANT_SYS_PSELECT6 + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0"); \ + \ + __asm__ volatile ( \ + "ecall\n\t" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "ecall\n\t" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + register long _arg6 __asm__ ("a5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +__asm__ (".section .text\n" + ".weak _start\n" + "_start:\n" + ".option push\n" + ".option norelax\n" + "lla gp, __global_pointer$\n" + ".option pop\n" + "ld a0, 0(sp)\n" // argc (a0) was in the stack + "add a1, sp, "SZREG"\n" // argv (a1) = sp + "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... + "add a2, a2, "SZREG"\n" // + SZREG (skip null) + "add a2,a2,a1\n" // + argv + "andi sp,a1,-16\n" // sp must be 16-byte aligned + "call main\n" // main() returns the status code, we'll exit with it. + "li a7, 93\n" // NR_exit == 93 + "ecall\n" + ""); + +#endif // _NOLIBC_ARCH_RISCV_H diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h new file mode 100644 index 000000000000..0e1e9eb8545d --- /dev/null +++ b/tools/include/nolibc/arch-x86_64.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * x86_64 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_X86_64_H +#define _NOLIBC_ARCH_X86_64_H + +/* O_* macros for fcntl/open are architecture-specific */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, equivalent to stat64(). The + * syscall returns 116 bytes and stops in the middle of __unused. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + + long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r11 are clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home + * + */ + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + register long _arg5 __asm__ ("r8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + register long _arg5 __asm__ ("r8") = (long)(arg5); \ + register long _arg6 __asm__ ("r9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ +__asm__ (".section .text\n" + ".weak _start\n" + "_start:\n" + "pop %rdi\n" // argc (first arg, %rdi) + "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) + "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call + "call main\n" // main() returns the status code, we'll exit with it. + "mov %eax, %edi\n" // retrieve exit code (32 bit) + "mov $60, %eax\n" // NR_exit == 60 + "syscall\n" // really exit + "hlt\n" // ensure it does not return + ""); + +#endif // _NOLIBC_ARCH_X86_64_H diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h new file mode 100644 index 000000000000..4c6992321b0d --- /dev/null +++ b/tools/include/nolibc/arch.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +/* Below comes the architecture-specific code. For each architecture, we have + * the syscall declarations and the _start code definition. This is the only + * global part. On all architectures the kernel puts everything in the stack + * before jumping to _start just above us, without any return address (_start + * is not a function but an entry pint). So at the stack pointer we find argc. + * Then argv[] begins, and ends at the first NULL. Then we have envp which + * starts and ends with a NULL as well. So envp=argv+argc+1. + */ + +#ifndef _NOLIBC_ARCH_H +#define _NOLIBC_ARCH_H + +#if defined(__x86_64__) +#include "arch-x86_64.h" +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-i386.h" +#elif defined(__ARM_EABI__) +#include "arch-arm.h" +#elif defined(__aarch64__) +#include "arch-aarch64.h" +#elif defined(__mips__) && defined(_ABIO32) +#include "arch-mips.h" +#elif defined(__riscv) +#include "arch-riscv.h" +#endif + +#endif /* _NOLIBC_ARCH_H */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h new file mode 100644 index 000000000000..e3000b2992d7 --- /dev/null +++ b/tools/include/nolibc/ctype.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ctype function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_CTYPE_H +#define _NOLIBC_CTYPE_H + +#include "std.h" + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +static __attribute__((unused)) +int isascii(int c) +{ + /* 0x00..0x7f */ + return (unsigned int)c <= 0x7f; +} + +static __attribute__((unused)) +int isblank(int c) +{ + return c == '\t' || c == ' '; +} + +static __attribute__((unused)) +int iscntrl(int c) +{ + /* 0x00..0x1f, 0x7f */ + return (unsigned int)c < 0x20 || c == 0x7f; +} + +static __attribute__((unused)) +int isdigit(int c) +{ + return (unsigned int)(c - '0') < 10; +} + +static __attribute__((unused)) +int isgraph(int c) +{ + /* 0x21..0x7e */ + return (unsigned int)(c - 0x21) < 0x5e; +} + +static __attribute__((unused)) +int islower(int c) +{ + return (unsigned int)(c - 'a') < 26; +} + +static __attribute__((unused)) +int isprint(int c) +{ + /* 0x20..0x7e */ + return (unsigned int)(c - 0x20) < 0x5f; +} + +static __attribute__((unused)) +int isspace(int c) +{ + /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */ + return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5; +} + +static __attribute__((unused)) +int isupper(int c) +{ + return (unsigned int)(c - 'A') < 26; +} + +static __attribute__((unused)) +int isxdigit(int c) +{ + return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6; +} + +static __attribute__((unused)) +int isalpha(int c) +{ + return islower(c) || isupper(c); +} + +static __attribute__((unused)) +int isalnum(int c) +{ + return isalpha(c) || isdigit(c); +} + +static __attribute__((unused)) +int ispunct(int c) +{ + return isgraph(c) && !isalnum(c); +} + +#endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h new file mode 100644 index 000000000000..06893d6dfb7a --- /dev/null +++ b/tools/include/nolibc/errno.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Minimal errno definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ERRNO_H +#define _NOLIBC_ERRNO_H + +#include <asm/errno.h> + +/* this way it will be removed if unused */ +static int errno; + +#ifndef NOLIBC_IGNORE_ERRNO +#define SET_ERRNO(v) do { errno = (v); } while (0) +#else +#define SET_ERRNO(v) do { } while (0) +#endif + + +/* errno codes all ensure that they will not conflict with a valid pointer + * because they all correspond to the highest addressable memory page. + */ +#define MAX_ERRNO 4095 + +#endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index c1c285fe494a..b2bc48d3cfe4 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -57,22 +57,32 @@ * having to specify anything. * * Finally some very common libc-level functions are provided. It is the case - * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing - * is currently provided regarding stdio emulation. + * for a few functions usually found in string.h, ctype.h, or stdlib.h. * - * The macro NOLIBC is always defined, so that it is possible for a program to - * check this macro to know if it is being built against and decide to disable - * some features or simply not to include some standard libc files. - * - * Ideally this file should be split in multiple files for easier long term - * maintenance, but provided as a single file as it is now, it's quite - * convenient to use. Maybe some variations involving a set of includes at the - * top could work. + * The nolibc.h file is only a convenient entry point which includes all other + * files. It also defines the NOLIBC macro, so that it is possible for a + * program to check this macro to know if it is being built against and decide + * to disable some features or simply not to include some standard libc files. * * A simple static executable may be built this way : * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ * -static -include nolibc.h -o hello hello.c -lgcc * + * Simple programs meant to be reasonably portable to various libc and using + * only a few common includes, may also be built by simply making the include + * path point to the nolibc directory: + * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + * -I../nolibc -o hello hello.c -lgcc + * + * The available standard (but limited) include files are: + * ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h + * + * In addition, the following ones are expected to be provided by the compiler: + * float.h, stdarg.h, stddef.h + * + * The following ones which are part to the C standard are not provided: + * assert.h, locale.h, math.h, setjmp.h, limits.h + * * A very useful calling convention table may be found here : * http://man7.org/linux/man-pages/man2/syscall.2.html * @@ -80,2502 +90,22 @@ * https://w3challs.com/syscalls/ * */ +#ifndef _NOLIBC_H +#define _NOLIBC_H -#include <asm/unistd.h> -#include <asm/ioctls.h> -#include <asm/errno.h> -#include <linux/fs.h> -#include <linux/loop.h> -#include <linux/time.h> +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" +#include "ctype.h" +#include "signal.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "time.h" +#include "unistd.h" +/* Used by programs to avoid std includes */ #define NOLIBC -/* this way it will be removed if unused */ -static int errno; - -#ifndef NOLIBC_IGNORE_ERRNO -#define SET_ERRNO(v) do { errno = (v); } while (0) -#else -#define SET_ERRNO(v) do { } while (0) -#endif - -/* errno codes all ensure that they will not conflict with a valid pointer - * because they all correspond to the highest addressable memory page. - */ -#define MAX_ERRNO 4095 - -/* Declare a few quite common macros and types that usually are in stdlib.h, - * stdint.h, ctype.h, unistd.h and a few other common locations. - */ - -#define NULL ((void *)0) - -/* stdint types */ -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef unsigned short uint16_t; -typedef signed short int16_t; -typedef unsigned int uint32_t; -typedef signed int int32_t; -typedef unsigned long long uint64_t; -typedef signed long long int64_t; -typedef unsigned long size_t; -typedef signed long ssize_t; -typedef unsigned long uintptr_t; -typedef signed long intptr_t; -typedef signed long ptrdiff_t; - -/* for stat() */ -typedef unsigned int dev_t; -typedef unsigned long ino_t; -typedef unsigned int mode_t; -typedef signed int pid_t; -typedef unsigned int uid_t; -typedef unsigned int gid_t; -typedef unsigned long nlink_t; -typedef signed long off_t; -typedef signed long blksize_t; -typedef signed long blkcnt_t; -typedef signed long time_t; - -/* for poll() */ -struct pollfd { - int fd; - short int events; - short int revents; -}; - -/* for getdents64() */ -struct linux_dirent64 { - uint64_t d_ino; - int64_t d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[]; -}; - -/* commonly an fd_set represents 256 FDs */ -#define FD_SETSIZE 256 -typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; - -/* needed by wait4() */ -struct rusage { - struct timeval ru_utime; - struct timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - -/* stat flags (WARNING, octal here) */ -#define S_IFDIR 0040000 -#define S_IFCHR 0020000 -#define S_IFBLK 0060000 -#define S_IFREG 0100000 -#define S_IFIFO 0010000 -#define S_IFLNK 0120000 -#define S_IFSOCK 0140000 -#define S_IFMT 0170000 - -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) - -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 - -/* all the *at functions */ -#ifndef AT_FDCWD -#define AT_FDCWD -100 -#endif - -/* lseek */ -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 - -/* reboot */ -#define LINUX_REBOOT_MAGIC1 0xfee1dead -#define LINUX_REBOOT_MAGIC2 0x28121969 -#define LINUX_REBOOT_CMD_HALT 0xcdef0123 -#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc -#define LINUX_REBOOT_CMD_RESTART 0x01234567 -#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 - - -/* The format of the struct as returned by the libc to the application, which - * significantly differs from the format returned by the stat() syscall flavours. - */ -struct stat { - dev_t st_dev; /* ID of device containing file */ - ino_t st_ino; /* inode number */ - mode_t st_mode; /* protection */ - nlink_t st_nlink; /* number of hard links */ - uid_t st_uid; /* user ID of owner */ - gid_t st_gid; /* group ID of owner */ - dev_t st_rdev; /* device ID (if special file) */ - off_t st_size; /* total size, in bytes */ - blksize_t st_blksize; /* blocksize for file system I/O */ - blkcnt_t st_blocks; /* number of 512B blocks allocated */ - time_t st_atime; /* time of last access */ - time_t st_mtime; /* time of last modification */ - time_t st_ctime; /* time of last status change */ -}; - -#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) -#define WIFEXITED(status) (((status) & 0x7f) == 0) - -/* for SIGCHLD */ -#include <asm/signal.h> - -/* Below comes the architecture-specific code. For each architecture, we have - * the syscall declarations and the _start code definition. This is the only - * global part. On all architectures the kernel puts everything in the stack - * before jumping to _start just above us, without any return address (_start - * is not a function but an entry pint). So at the stack pointer we find argc. - * Then argv[] begins, and ends at the first NULL. Then we have envp which - * starts and ends with a NULL as well. So envp=argv+argc+1. - */ - -#if defined(__x86_64__) -/* Syscalls for x86_64 : - * - registers are 64-bit - * - syscall number is passed in rax - * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively - * - the system call is performed by calling the syscall instruction - * - syscall return comes in rax - * - rcx and r11 are clobbered, others are preserved. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 - * Calling Conventions. - * - * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI - * - */ - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - register long _arg5 asm("r8") = (long)(arg5); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - register long _arg5 asm("r8") = (long)(arg5); \ - register long _arg6 asm("r9") = (long)(arg6); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -/* - * x86-64 System V ABI mandates: - * 1) %rsp must be 16-byte aligned right before the function call. - * 2) The deepest stack frame should be zero (the %rbp). - * - */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %rdi\n" // argc (first arg, %rdi) - "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) - "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "xor %ebp, %ebp\n" // zero the stack frame - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call - "call main\n" // main() returns the status code, we'll exit with it. - "mov %eax, %edi\n" // retrieve exit code (32 bit) - "mov $60, %eax\n" // NR_exit == 60 - "syscall\n" // really exit - "hlt\n" // ensure it does not return - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall, equivalent to stat64(). The - * syscall returns 116 bytes and stops in the middle of __unused. - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned int st_mode; - unsigned int st_uid; - - unsigned int st_gid; - unsigned int __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - - long st_blocks; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - long __unused[3]; -}; - -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - register long _arg4 asm("esi") = (long)(arg4); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - register long _arg4 asm("esi") = (long)(arg4); \ - register long _arg5 asm("edi") = (long)(arg5); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %eax\n" // argc (first arg, %eax) - "mov %esp, %ebx\n" // argv[] (second arg, %ebx) - "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "xor %ebp, %ebp\n" // zero the stack frame - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before - "sub $4, %esp\n" // the call instruction (args are aligned) - "push %ecx\n" // push all registers on the stack so that we - "push %ebx\n" // support both regparm and plain stack modes - "push %eax\n" - "call main\n" // main() returns the status code in %eax - "mov %eax, %ebx\n" // retrieve exit code (32-bit int) - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now - "hlt\n" // ensure it does not - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall, 32-bit only, the syscall returns - * exactly 56 bytes (stops before the unused array). - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - - unsigned long st_rdev; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long __unused[2]; -}; - -#elif defined(__ARM_EABI__) -/* Syscalls for ARM in ARM or Thumb modes : - * - registers are 32-bit - * - stack is 8-byte aligned - * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) - * - syscall number is passed in r7 - * - arguments are in r0, r1, r2, r3, r4, r5 - * - the system call is performed by calling svc #0 - * - syscall return comes in r0. - * - only lr is clobbered. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, ARM supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0"); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - register long _arg4 asm("r3") = (long)(arg4); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - register long _arg4 asm("r3") = (long)(arg4); \ - register long _arg5 asm("r4") = (long)(arg5); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" -#if defined(__THUMBEB__) || defined(__THUMBEL__) - /* We enter here in 32-bit mode but if some previous functions were in - * 16-bit mode, the assembler cannot know, so we need to tell it we're in - * 32-bit now, then switch to 16-bit (is there a better way to do it than - * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that - * it generates correct instructions. Note that we do not support thumb1. - */ - ".code 32\n" - "add r0, pc, #1\n" - "bx r0\n" - ".code 16\n" -#endif - "pop {%r0}\n" // argc was in the stack - "mov %r1, %sp\n" // argv = sp - "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... - "add %r2, %r2, $4\n" // ... + 4 - "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the - "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) - "bl main\n" // main() returns the status code, we'll exit with it. - "movs r7, $1\n" // NR_exit == 1 - "svc $0x00\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x4000 - -/* The struct returned by the stat() syscall, 32-bit only, the syscall returns - * exactly 56 bytes (stops before the unused array). In big endian, the format - * differs as devices are returned as short only. - */ -struct sys_stat_struct { -#if defined(__ARMEB__) - unsigned short st_dev; - unsigned short __pad1; -#else - unsigned long st_dev; -#endif - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; -#if defined(__ARMEB__) - unsigned short st_rdev; - unsigned short __pad2; -#else - unsigned long st_rdev; -#endif - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long __unused[2]; -}; - -#elif defined(__aarch64__) -/* Syscalls for AARCH64 : - * - registers are 64-bit - * - stack is 16-byte aligned - * - syscall number is passed in x8 - * - arguments are in x0, x1, x2, x3, x4, x5 - * - the system call is performed by calling svc 0 - * - syscall return comes in x0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - * On aarch64, select() is not implemented so we have to use pselect6(). - */ -#define __ARCH_WANT_SYS_PSELECT6 - -#define my_syscall0(num) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0"); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - register long _arg5 asm("x4") = (long)(arg5); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - register long _arg5 asm("x4") = (long)(arg5); \ - register long _arg6 asm("x5") = (long)(arg6); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "ldr x0, [sp]\n" // argc (x0) was in the stack - "add x1, sp, 8\n" // argv (x1) = sp - "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... - "add x2, x2, 8\n" // + 8 (skip null) - "add x2, x2, x1\n" // + argv - "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee - "bl main\n" // main() returns the status code, we'll exit with it. - "mov x8, 93\n" // NR_exit == 93 - "svc #0\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x4000 - -/* The struct returned by the newfstatat() syscall. Differs slightly from the - * x86_64's stat one by field ordering, so be careful. - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - - unsigned long st_rdev; - unsigned long __pad1; - long st_size; - int st_blksize; - int __pad2; - - long st_blocks; - long st_atime; - unsigned long st_atime_nsec; - long st_mtime; - - unsigned long st_mtime_nsec; - long st_ctime; - unsigned long st_ctime_nsec; - unsigned int __unused[2]; -}; - -#elif defined(__mips__) && defined(_ABIO32) -/* Syscalls for MIPS ABI O32 : - * - WARNING! there's always a delayed slot! - * - WARNING again, the syntax is different, registers take a '$' and numbers - * do not. - * - registers are 32-bit - * - stack is 8-byte aligned - * - syscall number is passed in v0 (starts at 0xfa0). - * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to - * leave some room in the stack for the callee to save a0..a3 if needed. - * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are - * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as - * scall32-o32.S in the kernel sources. - * - the system call is performed by calling "syscall" - * - syscall return comes in v0, and register a3 needs to be checked to know - * if an error occurred, in which case errno is in v0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - */ - -#define my_syscall0(num) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "r"(_num) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "sw %7, 16($sp)\n" \ - "syscall\n " \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -/* startup code, note that it's called __start on MIPS */ -asm(".section .text\n" - ".set nomips16\n" - ".global __start\n" - ".set noreorder\n" - ".option pic0\n" - ".ent __start\n" - "__start:\n" - "lw $a0,($sp)\n" // argc was in the stack - "addiu $a1, $sp, 4\n" // argv = sp + 4 - "sll $a2, $a0, 2\n" // a2 = argc * 4 - "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... - "addiu $a2, $a2, 4\n" // ... + 4 - "li $t0, -8\n" - "and $sp, $sp, $t0\n" // sp must be 8-byte aligned - "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! - "jal main\n" // main() returns the status code, we'll exit with it. - "nop\n" // delayed slot - "move $a0, $v0\n" // retrieve 32-bit exit code from v0 - "li $v0, 4001\n" // NR_exit == 4001 - "syscall\n" - ".end __start\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_APPEND 0x0008 -#define O_NONBLOCK 0x0080 -#define O_CREAT 0x0100 -#define O_TRUNC 0x0200 -#define O_EXCL 0x0400 -#define O_NOCTTY 0x0800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall. 88 bytes are returned by the - * syscall. - */ -struct sys_stat_struct { - unsigned int st_dev; - long st_pad1[3]; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned int st_rdev; - long st_pad2[2]; - long st_size; - long st_pad3; - long st_atime; - long st_atime_nsec; - long st_mtime; - long st_mtime_nsec; - long st_ctime; - long st_ctime_nsec; - long st_blksize; - long st_blocks; - long st_pad4[14]; -}; - -#elif defined(__riscv) - -#if __riscv_xlen == 64 -#define PTRLOG "3" -#define SZREG "8" -#elif __riscv_xlen == 32 -#define PTRLOG "2" -#define SZREG "4" -#endif - -/* Syscalls for RISCV : - * - stack is 16-byte aligned - * - syscall number is passed in a7 - * - arguments are in a0, a1, a2, a3, a4, a5 - * - the system call is performed by calling ecall - * - syscall return comes in a0 - * - the arguments are cast to long and assigned into the target - * registers which are then simply passed as registers to the asm code, - * so that we don't have to experience issues with register constraints. - */ - -#define my_syscall0(num) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0"); \ - \ - asm volatile ( \ - "ecall\n\t" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - \ - asm volatile ( \ - "ecall\n\t" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 asm("a4") = (long)(arg5); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 asm("a4") = (long)(arg5); \ - register long _arg6 asm("a5") = (long)(arg6); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - ".option push\n" - ".option norelax\n" - "lla gp, __global_pointer$\n" - ".option pop\n" - "ld a0, 0(sp)\n" // argc (a0) was in the stack - "add a1, sp, "SZREG"\n" // argv (a1) = sp - "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... - "add a2, a2, "SZREG"\n" // + SZREG (skip null) - "add a2,a2,a1\n" // + argv - "andi sp,a1,-16\n" // sp must be 16-byte aligned - "call main\n" // main() returns the status code, we'll exit with it. - "li a7, 93\n" // NR_exit == 93 - "ecall\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 - -struct sys_stat_struct { - unsigned long st_dev; /* Device. */ - unsigned long st_ino; /* File serial number. */ - unsigned int st_mode; /* File mode. */ - unsigned int st_nlink; /* Link count. */ - unsigned int st_uid; /* User ID of the file's owner. */ - unsigned int st_gid; /* Group ID of the file's group. */ - unsigned long st_rdev; /* Device number, if device. */ - unsigned long __pad1; - long st_size; /* Size of file, in bytes. */ - int st_blksize; /* Optimal block size for I/O. */ - int __pad2; - long st_blocks; /* Number 512-byte blocks allocated. */ - long st_atime; /* Time of last access. */ - unsigned long st_atime_nsec; - long st_mtime; /* Time of last modification. */ - unsigned long st_mtime_nsec; - long st_ctime; /* Time of last status change. */ - unsigned long st_ctime_nsec; - unsigned int __unused4; - unsigned int __unused5; -}; - -#endif - - -/* Below are the C functions used to declare the raw syscalls. They try to be - * architecture-agnostic, and return either a success or -errno. Declaring them - * static will lead to them being inlined in most cases, but it's still possible - * to reference them by a pointer if needed. - */ -static __attribute__((unused)) -void *sys_brk(void *addr) -{ - return (void *)my_syscall1(__NR_brk, addr); -} - -static __attribute__((noreturn,unused)) -void sys_exit(int status) -{ - my_syscall1(__NR_exit, status & 255); - while(1); // shut the "noreturn" warnings. -} - -static __attribute__((unused)) -int sys_chdir(const char *path) -{ - return my_syscall1(__NR_chdir, path); -} - -static __attribute__((unused)) -int sys_chmod(const char *path, mode_t mode) -{ -#ifdef __NR_fchmodat - return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); -#else -#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod() -#endif -} - -static __attribute__((unused)) -int sys_chown(const char *path, uid_t owner, gid_t group) -{ -#ifdef __NR_fchownat - return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); -#else -#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown() -#endif -} - -static __attribute__((unused)) -int sys_chroot(const char *path) -{ - return my_syscall1(__NR_chroot, path); -} - -static __attribute__((unused)) -int sys_close(int fd) -{ - return my_syscall1(__NR_close, fd); -} - -static __attribute__((unused)) -int sys_dup(int fd) -{ - return my_syscall1(__NR_dup, fd); -} - -#ifdef __NR_dup3 -static __attribute__((unused)) -int sys_dup3(int old, int new, int flags) -{ - return my_syscall3(__NR_dup3, old, new, flags); -} -#endif - -static __attribute__((unused)) -int sys_dup2(int old, int new) -{ -#ifdef __NR_dup3 - return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); -#else -#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2() -#endif -} - -static __attribute__((unused)) -int sys_execve(const char *filename, char *const argv[], char *const envp[]) -{ - return my_syscall3(__NR_execve, filename, argv, envp); -} - -static __attribute__((unused)) -pid_t sys_fork(void) -{ -#ifdef __NR_clone - /* note: some archs only have clone() and not fork(). Different archs - * have a different API, but most archs have the flags on first arg and - * will not use the rest with no other flag. - */ - return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); -#else -#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork() -#endif -} - -static __attribute__((unused)) -int sys_fsync(int fd) -{ - return my_syscall1(__NR_fsync, fd); -} - -static __attribute__((unused)) -int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - return my_syscall3(__NR_getdents64, fd, dirp, count); -} - -static __attribute__((unused)) -pid_t sys_getpgid(pid_t pid) -{ - return my_syscall1(__NR_getpgid, pid); -} - -static __attribute__((unused)) -pid_t sys_getpgrp(void) -{ - return sys_getpgid(0); -} - -static __attribute__((unused)) -pid_t sys_getpid(void) -{ - return my_syscall0(__NR_getpid); -} - -static __attribute__((unused)) -pid_t sys_gettid(void) -{ - return my_syscall0(__NR_gettid); -} - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return my_syscall2(__NR_gettimeofday, tv, tz); -} - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int sys_kill(pid_t pid, int signal) -{ - return my_syscall2(__NR_kill, pid, signal); -} - -static __attribute__((unused)) -int sys_link(const char *old, const char *new) -{ -#ifdef __NR_linkat - return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); -#else -#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link() -#endif -} - -static __attribute__((unused)) -off_t sys_lseek(int fd, off_t offset, int whence) -{ - return my_syscall3(__NR_lseek, fd, offset, whence); -} - -static __attribute__((unused)) -int sys_mkdir(const char *path, mode_t mode) -{ -#ifdef __NR_mkdirat - return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); -#else -#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir() -#endif -} - -static __attribute__((unused)) -long sys_mknod(const char *path, mode_t mode, dev_t dev) -{ -#ifdef __NR_mknodat - return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); -#else -#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod() -#endif -} - -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#elif defined(__NR_open) - return my_syscall3(__NR_open, path, flags, mode); -#else -#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open() -#endif -} - -static __attribute__((unused)) -int sys_pivot_root(const char *new, const char *old) -{ - return my_syscall2(__NR_pivot_root, new, old); -} - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else -#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll() -#endif -} - -static __attribute__((unused)) -ssize_t sys_read(int fd, void *buf, size_t count) -{ - return my_syscall3(__NR_read, fd, buf, count); -} - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int sys_sched_yield(void) -{ - return my_syscall0(__NR_sched_yield); -} - -static __attribute__((unused)) -int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) - struct timespec t; - - if (timeout) { - t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; - } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#elif defined(__NR__newselect) || defined(__NR_select) -#ifndef __NR__newselect -#define __NR__newselect __NR_select -#endif - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#else -#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select() -#endif -} - -static __attribute__((unused)) -int sys_setpgid(pid_t pid, pid_t pgid) -{ - return my_syscall2(__NR_setpgid, pid, pgid); -} - -static __attribute__((unused)) -pid_t sys_setsid(void) -{ - return my_syscall0(__NR_setsid); -} - -static __attribute__((unused)) -int sys_stat(const char *path, struct stat *buf) -{ - struct sys_stat_struct stat; - long ret; - -#ifdef __NR_newfstatat - /* only solution for arm64 */ - ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); -#elif defined(__NR_stat) - ret = my_syscall2(__NR_stat, path, &stat); -#else -#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat() -#endif - buf->st_dev = stat.st_dev; - buf->st_ino = stat.st_ino; - buf->st_mode = stat.st_mode; - buf->st_nlink = stat.st_nlink; - buf->st_uid = stat.st_uid; - buf->st_gid = stat.st_gid; - buf->st_rdev = stat.st_rdev; - buf->st_size = stat.st_size; - buf->st_blksize = stat.st_blksize; - buf->st_blocks = stat.st_blocks; - buf->st_atime = stat.st_atime; - buf->st_mtime = stat.st_mtime; - buf->st_ctime = stat.st_ctime; - return ret; -} - - -static __attribute__((unused)) -int sys_symlink(const char *old, const char *new) -{ -#ifdef __NR_symlinkat - return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); -#else -#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink() -#endif -} - -static __attribute__((unused)) -mode_t sys_umask(mode_t mode) -{ - return my_syscall1(__NR_umask, mode); -} - -static __attribute__((unused)) -int sys_umount2(const char *path, int flags) -{ - return my_syscall2(__NR_umount2, path, flags); -} - -static __attribute__((unused)) -int sys_unlink(const char *path) -{ -#ifdef __NR_unlinkat - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); -#else -#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink() -#endif -} - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return my_syscall4(__NR_wait4, pid, status, options, rusage); -} - -static __attribute__((unused)) -pid_t sys_waitpid(pid_t pid, int *status, int options) -{ - return sys_wait4(pid, status, options, 0); -} - -static __attribute__((unused)) -pid_t sys_wait(int *status) -{ - return sys_waitpid(-1, status, 0); -} - -static __attribute__((unused)) -ssize_t sys_write(int fd, const void *buf, size_t count) -{ - return my_syscall3(__NR_write, fd, buf, count); -} - - -/* Below are the libc-compatible syscalls which return x or -1 and set errno. - * They rely on the functions above. Similarly they're marked static so that it - * is possible to assign pointers to them if needed. - */ - -static __attribute__((unused)) -int brk(void *addr) -{ - void *ret = sys_brk(addr); - - if (!ret) { - SET_ERRNO(ENOMEM); - return -1; - } - return 0; -} - -static __attribute__((noreturn,unused)) -void exit(int status) -{ - sys_exit(status); -} - -static __attribute__((unused)) -int chdir(const char *path) -{ - int ret = sys_chdir(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chmod(const char *path, mode_t mode) -{ - int ret = sys_chmod(path, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chown(const char *path, uid_t owner, gid_t group) -{ - int ret = sys_chown(path, owner, group); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chroot(const char *path) -{ - int ret = sys_chroot(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int close(int fd) -{ - int ret = sys_close(fd); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int dup(int fd) -{ - int ret = sys_dup(fd); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int dup2(int old, int new) -{ - int ret = sys_dup2(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -#ifdef __NR_dup3 -static __attribute__((unused)) -int dup3(int old, int new, int flags) -{ - int ret = sys_dup3(old, new, flags); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} -#endif - -static __attribute__((unused)) -int execve(const char *filename, char *const argv[], char *const envp[]) -{ - int ret = sys_execve(filename, argv, envp); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t fork(void) -{ - pid_t ret = sys_fork(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int fsync(int fd) -{ - int ret = sys_fsync(fd); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - int ret = sys_getdents64(fd, dirp, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t getpgid(pid_t pid) -{ - pid_t ret = sys_getpgid(pid); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t getpgrp(void) -{ - pid_t ret = sys_getpgrp(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t getpid(void) -{ - pid_t ret = sys_getpid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t gettid(void) -{ - pid_t ret = sys_gettid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - int ret = sys_gettimeofday(tv, tz); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - int ret = sys_ioctl(fd, req, value); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int kill(pid_t pid, int signal) -{ - int ret = sys_kill(pid, signal); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int link(const char *old, const char *new) -{ - int ret = sys_link(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -off_t lseek(int fd, off_t offset, int whence) -{ - off_t ret = sys_lseek(fd, offset, whence); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mkdir(const char *path, mode_t mode) -{ - int ret = sys_mkdir(path, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mknod(const char *path, mode_t mode, dev_t dev) -{ - int ret = sys_mknod(path, mode, dev); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - int ret = sys_mount(src, tgt, fst, flags, data); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int open(const char *path, int flags, mode_t mode) -{ - int ret = sys_open(path, flags, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int pivot_root(const char *new, const char *old) -{ - int ret = sys_pivot_root(new, old); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - int ret = sys_poll(fds, nfds, timeout); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -ssize_t read(int fd, void *buf, size_t count) -{ - ssize_t ret = sys_read(fd, buf, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -void *sbrk(intptr_t inc) -{ - void *ret; - - /* first call to find current end */ - if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) - return ret + inc; - - SET_ERRNO(ENOMEM); - return (void *)-1; -} - -static __attribute__((unused)) -int sched_yield(void) -{ - int ret = sys_sched_yield(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ - int ret = sys_select(nfds, rfds, wfds, efds, timeout); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int setpgid(pid_t pid, pid_t pgid) -{ - int ret = sys_setpgid(pid, pgid); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t setsid(void) -{ - pid_t ret = sys_setsid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -unsigned int sleep(unsigned int seconds) -{ - struct timeval my_timeval = { seconds, 0 }; - - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) - return my_timeval.tv_sec + !!my_timeval.tv_usec; - else - return 0; -} - -static __attribute__((unused)) -int msleep(unsigned int msecs) -{ - struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; - - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) - return (my_timeval.tv_sec * 1000) + - (my_timeval.tv_usec / 1000) + - !!(my_timeval.tv_usec % 1000); - else - return 0; -} - -static __attribute__((unused)) -int stat(const char *path, struct stat *buf) -{ - int ret = sys_stat(path, buf); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int symlink(const char *old, const char *new) -{ - int ret = sys_symlink(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int tcsetpgrp(int fd, pid_t pid) -{ - return ioctl(fd, TIOCSPGRP, &pid); -} - -static __attribute__((unused)) -mode_t umask(mode_t mode) -{ - return sys_umask(mode); -} - -static __attribute__((unused)) -int umount2(const char *path, int flags) -{ - int ret = sys_umount2(path, flags); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int unlink(const char *path) -{ - int ret = sys_unlink(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - pid_t ret = sys_wait4(pid, status, options, rusage); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - pid_t ret = sys_waitpid(pid, status, options); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - pid_t ret = sys_wait(status); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -ssize_t write(int fd, const void *buf, size_t count) -{ - ssize_t ret = sys_write(fd, buf, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -/* some size-optimized reimplementations of a few common str* and mem* - * functions. They're marked static, except memcpy() and raise() which are used - * by libgcc on ARM, so they are marked weak instead in order not to cause an - * error when building a program made of multiple files (not recommended). - */ - -static __attribute__((unused)) -void *memmove(void *dst, const void *src, size_t len) -{ - ssize_t pos = (dst <= src) ? -1 : (long)len; - void *ret = dst; - - while (len--) { - pos += (dst <= src) ? 1 : -1; - ((char *)dst)[pos] = ((char *)src)[pos]; - } - return ret; -} - -static __attribute__((unused)) -void *memset(void *dst, int b, size_t len) -{ - char *p = dst; - - while (len--) - *(p++) = b; - return dst; -} - -static __attribute__((unused)) -int memcmp(const void *s1, const void *s2, size_t n) -{ - size_t ofs = 0; - char c1 = 0; - - while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { - ofs++; - } - return c1; -} - -static __attribute__((unused)) -char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - while ((*dst++ = *src++)); - return ret; -} - -static __attribute__((unused)) -char *strchr(const char *s, int c) -{ - while (*s) { - if (*s == (char)c) - return (char *)s; - s++; - } - return NULL; -} - -static __attribute__((unused)) -char *strrchr(const char *s, int c) -{ - const char *ret = NULL; - - while (*s) { - if (*s == (char)c) - ret = s; - s++; - } - return (char *)ret; -} - -static __attribute__((unused)) -size_t nolibc_strlen(const char *str) -{ - size_t len; - - for (len = 0; str[len]; len++); - return len; -} - -#define strlen(str) ({ \ - __builtin_constant_p((str)) ? \ - __builtin_strlen((str)) : \ - nolibc_strlen((str)); \ -}) - -static __attribute__((unused)) -int isdigit(int c) -{ - return (unsigned int)(c - '0') <= 9; -} - -static __attribute__((unused)) -long atol(const char *s) -{ - unsigned long ret = 0; - unsigned long d; - int neg = 0; - - if (*s == '-') { - neg = 1; - s++; - } - - while (1) { - d = (*s++) - '0'; - if (d > 9) - break; - ret *= 10; - ret += d; - } - - return neg ? -ret : ret; -} - -static __attribute__((unused)) -int atoi(const char *s) -{ - return atol(s); -} - -static __attribute__((unused)) -const char *ltoa(long in) -{ - /* large enough for -9223372036854775808 */ - static char buffer[21]; - char *pos = buffer + sizeof(buffer) - 1; - int neg = in < 0; - unsigned long n = neg ? -in : in; - - *pos-- = '\0'; - do { - *pos-- = '0' + n % 10; - n /= 10; - if (pos < buffer) - return pos + 1; - } while (n); - - if (neg) - *pos-- = '-'; - return pos + 1; -} - -__attribute__((weak,unused)) -void *memcpy(void *dst, const void *src, size_t len) -{ - return memmove(dst, src, len); -} - -/* needed by libgcc for divide by zero */ -__attribute__((weak,unused)) -int raise(int signal) -{ - return kill(getpid(), signal); -} - -/* Here come a few helper functions */ - -static __attribute__((unused)) -void FD_ZERO(fd_set *set) -{ - memset(set, 0, sizeof(*set)); -} - -static __attribute__((unused)) -void FD_SET(int fd, fd_set *set) -{ - if (fd < 0 || fd >= FD_SETSIZE) - return; - set->fd32[fd / 32] |= 1 << (fd & 31); -} - -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -static __attribute__((unused)) -dev_t makedev(unsigned int major, unsigned int minor) -{ - return ((major & 0xfff) << 8) | (minor & 0xff); -} +#endif /* _NOLIBC_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h new file mode 100644 index 000000000000..ef47e71e2be3 --- /dev/null +++ b/tools/include/nolibc/signal.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * signal function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_SIGNAL_H +#define _NOLIBC_SIGNAL_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* This one is not marked static as it's needed by libgcc for divide by zero */ +__attribute__((weak,unused,section(".text.nolibc_raise"))) +int raise(int signal) +{ + return sys_kill(sys_getpid(), signal); +} + +#endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h new file mode 100644 index 000000000000..1747ae125392 --- /dev/null +++ b/tools/include/nolibc/std.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Standard definitions and types for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STD_H +#define _NOLIBC_STD_H + +/* Declare a few quite common macros and types that usually are in stdlib.h, + * stdint.h, ctype.h, unistd.h and a few other common locations. Please place + * integer type definitions and generic macros here, but avoid OS-specific and + * syscall-specific stuff, as this file is expected to be included very early. + */ + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +/* stdint types */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +typedef unsigned long long uint64_t; +typedef signed long long int64_t; +typedef unsigned long size_t; +typedef signed long ssize_t; +typedef unsigned long uintptr_t; +typedef signed long intptr_t; +typedef signed long ptrdiff_t; + +/* those are commonly provided by sys/types.h */ +typedef unsigned int dev_t; +typedef unsigned long ino_t; +typedef unsigned int mode_t; +typedef signed int pid_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long nlink_t; +typedef signed long off_t; +typedef signed long blksize_t; +typedef signed long blkcnt_t; +typedef signed long time_t; + +#endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h new file mode 100644 index 000000000000..15dedf8d0902 --- /dev/null +++ b/tools/include/nolibc/stdio.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * minimal stdio function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STDIO_H +#define _NOLIBC_STDIO_H + +#include <stdarg.h> + +#include "std.h" +#include "arch.h" +#include "errno.h" +#include "types.h" +#include "sys.h" +#include "stdlib.h" +#include "string.h" + +#ifndef EOF +#define EOF (-1) +#endif + +/* just define FILE as a non-empty type */ +typedef struct FILE { + char dummy[1]; +} FILE; + +/* We define the 3 common stdio files as constant invalid pointers that + * are easily recognized. + */ +static __attribute__((unused)) FILE* const stdin = (FILE*)-3; +static __attribute__((unused)) FILE* const stdout = (FILE*)-2; +static __attribute__((unused)) FILE* const stderr = (FILE*)-1; + +/* getc(), fgetc(), getchar() */ + +#define getc(stream) fgetc(stream) + +static __attribute__((unused)) +int fgetc(FILE* stream) +{ + unsigned char ch; + int fd; + + if (stream < stdin || stream > stderr) + return EOF; + + fd = 3 + (long)stream; + + if (read(fd, &ch, 1) <= 0) + return EOF; + return ch; +} + +static __attribute__((unused)) +int getchar(void) +{ + return fgetc(stdin); +} + + +/* putc(), fputc(), putchar() */ + +#define putc(c, stream) fputc(c, stream) + +static __attribute__((unused)) +int fputc(int c, FILE* stream) +{ + unsigned char ch = c; + int fd; + + if (stream < stdin || stream > stderr) + return EOF; + + fd = 3 + (long)stream; + + if (write(fd, &ch, 1) <= 0) + return EOF; + return ch; +} + +static __attribute__((unused)) +int putchar(int c) +{ + return fputc(c, stdout); +} + + +/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ + +/* internal fwrite()-like function which only takes a size and returns 0 on + * success or EOF on error. It automatically retries on short writes. + */ +static __attribute__((unused)) +int _fwrite(const void *buf, size_t size, FILE *stream) +{ + ssize_t ret; + int fd; + + if (stream < stdin || stream > stderr) + return EOF; + + fd = 3 + (long)stream; + + while (size) { + ret = write(fd, buf, size); + if (ret <= 0) + return EOF; + size -= ret; + buf += ret; + } + return 0; +} + +static __attribute__((unused)) +size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream) +{ + size_t written; + + for (written = 0; written < nmemb; written++) { + if (_fwrite(s, size, stream) != 0) + break; + s += size; + } + return written; +} + +static __attribute__((unused)) +int fputs(const char *s, FILE *stream) +{ + return _fwrite(s, strlen(s), stream); +} + +static __attribute__((unused)) +int puts(const char *s) +{ + if (fputs(s, stdout) == EOF) + return EOF; + return putchar('\n'); +} + + +/* fgets() */ +static __attribute__((unused)) +char *fgets(char *s, int size, FILE *stream) +{ + int ofs; + int c; + + for (ofs = 0; ofs + 1 < size;) { + c = fgetc(stream); + if (c == EOF) + break; + s[ofs++] = c; + if (c == '\n') + break; + } + if (ofs < size) + s[ofs] = 0; + return ofs ? s : NULL; +} + + +/* minimal vfprintf(). It supports the following formats: + * - %[l*]{d,u,c,x,p} + * - %s + * - unknown modifiers are ignored. + */ +static __attribute__((unused)) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + char escape, lpref, c; + unsigned long long v; + unsigned int written; + size_t len, ofs; + char tmpbuf[21]; + const char *outstr; + + written = ofs = escape = lpref = 0; + while (1) { + c = fmt[ofs++]; + + if (escape) { + /* we're in an escape sequence, ofs == 1 */ + escape = 0; + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { + char *out = tmpbuf; + + if (c == 'p') + v = va_arg(args, unsigned long); + else if (lpref) { + if (lpref > 1) + v = va_arg(args, unsigned long long); + else + v = va_arg(args, unsigned long); + } else + v = va_arg(args, unsigned int); + + if (c == 'd') { + /* sign-extend the value */ + if (lpref == 0) + v = (long long)(int)v; + else if (lpref == 1) + v = (long long)(long)v; + } + + switch (c) { + case 'c': + out[0] = v; + out[1] = 0; + break; + case 'd': + i64toa_r(v, out); + break; + case 'u': + u64toa_r(v, out); + break; + case 'p': + *(out++) = '0'; + *(out++) = 'x'; + /* fall through */ + default: /* 'x' and 'p' above */ + u64toh_r(v, out); + break; + } + outstr = tmpbuf; + } + else if (c == 's') { + outstr = va_arg(args, char *); + if (!outstr) + outstr="(null)"; + } + else if (c == '%') { + /* queue it verbatim */ + continue; + } + else { + /* modifiers or final 0 */ + if (c == 'l') { + /* long format prefix, maintain the escape */ + lpref++; + } + escape = 1; + goto do_escape; + } + len = strlen(outstr); + goto flush_str; + } + + /* not an escape sequence */ + if (c == 0 || c == '%') { + /* flush pending data on escape or end */ + escape = 1; + lpref = 0; + outstr = fmt; + len = ofs - 1; + flush_str: + if (_fwrite(outstr, len, stream) != 0) + break; + + written += len; + do_escape: + if (c == 0) + break; + fmt += ofs; + ofs = 0; + continue; + } + + /* literal char, just queue it */ + } + return written; +} + +static __attribute__((unused)) +int fprintf(FILE *stream, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vfprintf(stream, fmt, args); + va_end(args); + return ret; +} + +static __attribute__((unused)) +int printf(const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vfprintf(stdout, fmt, args); + va_end(args); + return ret; +} + +static __attribute__((unused)) +void perror(const char *msg) +{ + fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); +} + +#endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h new file mode 100644 index 000000000000..8fd32eaf8037 --- /dev/null +++ b/tools/include/nolibc/stdlib.h @@ -0,0 +1,423 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stdlib function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STDLIB_H +#define _NOLIBC_STDLIB_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" +#include "string.h" + +struct nolibc_heap { + size_t len; + char user_p[] __attribute__((__aligned__)); +}; + +/* Buffer used to store int-to-ASCII conversions. Will only be implemented if + * any of the related functions is implemented. The area is large enough to + * store "18446744073709551615" or "-9223372036854775808" and the final zero. + */ +static __attribute__((unused)) char itoa_buffer[21]; + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +/* must be exported, as it's used by libgcc for various divide functions */ +__attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) +void abort(void) +{ + sys_kill(sys_getpid(), SIGABRT); + for (;;); +} + +static __attribute__((unused)) +long atol(const char *s) +{ + unsigned long ret = 0; + unsigned long d; + int neg = 0; + + if (*s == '-') { + neg = 1; + s++; + } + + while (1) { + d = (*s++) - '0'; + if (d > 9) + break; + ret *= 10; + ret += d; + } + + return neg ? -ret : ret; +} + +static __attribute__((unused)) +int atoi(const char *s) +{ + return atol(s); +} + +static __attribute__((unused)) +void free(void *ptr) +{ + struct nolibc_heap *heap; + + if (!ptr) + return; + + heap = container_of(ptr, struct nolibc_heap, user_p); + munmap(heap, heap->len); +} + +/* getenv() tries to find the environment variable named <name> in the + * environment array pointed to by global variable "environ" which must be + * declared as a char **, and must be terminated by a NULL (it is recommended + * to set this variable to the "envp" argument of main()). If the requested + * environment variable exists its value is returned otherwise NULL is + * returned. getenv() is forcefully inlined so that the reference to "environ" + * will be dropped if unused, even at -O0. + */ +static __attribute__((unused)) +char *_getenv(const char *name, char **environ) +{ + int idx, i; + + if (environ) { + for (idx = 0; environ[idx]; idx++) { + for (i = 0; name[i] && name[i] == environ[idx][i];) + i++; + if (!name[i] && environ[idx][i] == '=') + return &environ[idx][i+1]; + } + } + return NULL; +} + +static inline __attribute__((unused,always_inline)) +char *getenv(const char *name) +{ + extern char **environ; + return _getenv(name, environ); +} + +static __attribute__((unused)) +void *malloc(size_t len) +{ + struct nolibc_heap *heap; + + /* Always allocate memory with size multiple of 4096. */ + len = sizeof(*heap) + len; + len = (len + 4095UL) & -4096UL; + heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, + -1, 0); + if (__builtin_expect(heap == MAP_FAILED, 0)) + return NULL; + + heap->len = len; + return heap->user_p; +} + +static __attribute__((unused)) +void *calloc(size_t size, size_t nmemb) +{ + void *orig; + size_t res = 0; + + if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) { + SET_ERRNO(ENOMEM); + return NULL; + } + + /* + * No need to zero the heap, the MAP_ANONYMOUS in malloc() + * already does it. + */ + return malloc(res); +} + +static __attribute__((unused)) +void *realloc(void *old_ptr, size_t new_size) +{ + struct nolibc_heap *heap; + size_t user_p_len; + void *ret; + + if (!old_ptr) + return malloc(new_size); + + heap = container_of(old_ptr, struct nolibc_heap, user_p); + user_p_len = heap->len - sizeof(*heap); + /* + * Don't realloc() if @user_p_len >= @new_size, this block of + * memory is still enough to handle the @new_size. Just return + * the same pointer. + */ + if (user_p_len >= new_size) + return old_ptr; + + ret = malloc(new_size); + if (__builtin_expect(!ret, 0)) + return NULL; + + memcpy(ret, heap->user_p, heap->len); + munmap(heap, heap->len); + return ret; +} + +/* Converts the unsigned long integer <in> to its hex representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The + * buffer is filled from the first byte, and the number of characters emitted + * (not counting the trailing zero) is returned. The function is constructed + * in a way to optimize the code size and avoid any divide that could add a + * dependency on large external functions. + */ +static __attribute__((unused)) +int utoh_r(unsigned long in, char *buffer) +{ + signed char pos = (~0UL > 0xfffffffful) ? 60 : 28; + int digits = 0; + int dig; + + do { + dig = in >> pos; + in -= (uint64_t)dig << pos; + pos -= 4; + if (dig || digits || pos < 0) { + if (dig > 9) + dig += 'a' - '0' - 10; + buffer[digits++] = '0' + dig; + } + } while (pos >= 0); + + buffer[digits] = 0; + return digits; +} + +/* converts unsigned long <in> to an hex string using the static itoa_buffer + * and returns the pointer to that string. + */ +static inline __attribute__((unused)) +char *utoh(unsigned long in) +{ + utoh_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned long integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for + * 4294967295 in 32-bit). The buffer is filled from the first byte, and the + * number of characters emitted (not counting the trailing zero) is returned. + * The function is constructed in a way to optimize the code size and avoid + * any divide that could add a dependency on large external functions. + */ +static __attribute__((unused)) +int utoa_r(unsigned long in, char *buffer) +{ + unsigned long lim; + int digits = 0; + int pos = (~0UL > 0xfffffffful) ? 19 : 9; + int dig; + + do { + for (dig = 0, lim = 1; dig < pos; dig++) + lim *= 10; + + if (digits || in >= lim || !pos) { + for (dig = 0; in >= lim; dig++) + in -= lim; + buffer[digits++] = '0' + dig; + } + } while (pos--); + + buffer[digits] = 0; + return digits; +} + +/* Converts the signed long integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for + * -2147483648 in 32-bit). The buffer is filled from the first byte, and the + * number of characters emitted (not counting the trailing zero) is returned. + */ +static __attribute__((unused)) +int itoa_r(long in, char *buffer) +{ + char *ptr = buffer; + int len = 0; + + if (in < 0) { + in = -in; + *(ptr++) = '-'; + len++; + } + len += utoa_r(in, ptr); + return len; +} + +/* for historical compatibility, same as above but returns the pointer to the + * buffer. + */ +static inline __attribute__((unused)) +char *ltoa_r(long in, char *buffer) +{ + itoa_r(in, buffer); + return buffer; +} + +/* converts long integer <in> to a string using the static itoa_buffer and + * returns the pointer to that string. + */ +static inline __attribute__((unused)) +char *itoa(long in) +{ + itoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts long integer <in> to a string using the static itoa_buffer and + * returns the pointer to that string. Same as above, for compatibility. + */ +static inline __attribute__((unused)) +char *ltoa(long in) +{ + itoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts unsigned long integer <in> to a string using the static itoa_buffer + * and returns the pointer to that string. + */ +static inline __attribute__((unused)) +char *utoa(unsigned long in) +{ + utoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned 64-bit integer <in> to its hex representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. The function is constructed in a way to optimize + * the code size and avoid any divide that could add a dependency on large + * external functions. + */ +static __attribute__((unused)) +int u64toh_r(uint64_t in, char *buffer) +{ + signed char pos = 60; + int digits = 0; + int dig; + + do { + if (sizeof(long) >= 8) { + dig = (in >> pos) & 0xF; + } else { + /* 32-bit platforms: avoid a 64-bit shift */ + uint32_t d = (pos >= 32) ? (in >> 32) : in; + dig = (d >> (pos & 31)) & 0xF; + } + if (dig > 9) + dig += 'a' - '0' - 10; + pos -= 4; + if (dig || digits || pos < 0) + buffer[digits++] = '0' + dig; + } while (pos >= 0); + + buffer[digits] = 0; + return digits; +} + +/* converts uint64_t <in> to an hex string using the static itoa_buffer and + * returns the pointer to that string. + */ +static inline __attribute__((unused)) +char *u64toh(uint64_t in) +{ + u64toh_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned 64-bit integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. The function is constructed in a way to optimize + * the code size and avoid any divide that could add a dependency on large + * external functions. + */ +static __attribute__((unused)) +int u64toa_r(uint64_t in, char *buffer) +{ + unsigned long long lim; + int digits = 0; + int pos = 19; /* start with the highest possible digit */ + int dig; + + do { + for (dig = 0, lim = 1; dig < pos; dig++) + lim *= 10; + + if (digits || in >= lim || !pos) { + for (dig = 0; in >= lim; dig++) + in -= lim; + buffer[digits++] = '0' + dig; + } + } while (pos--); + + buffer[digits] = 0; + return digits; +} + +/* Converts the signed 64-bit integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. + */ +static __attribute__((unused)) +int i64toa_r(int64_t in, char *buffer) +{ + char *ptr = buffer; + int len = 0; + + if (in < 0) { + in = -in; + *(ptr++) = '-'; + len++; + } + len += u64toa_r(in, ptr); + return len; +} + +/* converts int64_t <in> to a string using the static itoa_buffer and returns + * the pointer to that string. + */ +static inline __attribute__((unused)) +char *i64toa(int64_t in) +{ + i64toa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts uint64_t <in> to a string using the static itoa_buffer and returns + * the pointer to that string. + */ +static inline __attribute__((unused)) +char *u64toa(uint64_t in) +{ + u64toa_r(in, itoa_buffer); + return itoa_buffer; +} + +#endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h new file mode 100644 index 000000000000..bef35bee9c44 --- /dev/null +++ b/tools/include/nolibc/string.h @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * string function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STRING_H +#define _NOLIBC_STRING_H + +#include "std.h" + +static void *malloc(size_t len); + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +static __attribute__((unused)) +int memcmp(const void *s1, const void *s2, size_t n) +{ + size_t ofs = 0; + char c1 = 0; + + while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { + ofs++; + } + return c1; +} + +static __attribute__((unused)) +void *_nolibc_memcpy_up(void *dst, const void *src, size_t len) +{ + size_t pos = 0; + + while (pos < len) { + ((char *)dst)[pos] = ((const char *)src)[pos]; + pos++; + } + return dst; +} + +static __attribute__((unused)) +void *_nolibc_memcpy_down(void *dst, const void *src, size_t len) +{ + while (len) { + len--; + ((char *)dst)[len] = ((const char *)src)[len]; + } + return dst; +} + +/* might be ignored by the compiler without -ffreestanding, then found as + * missing. + */ +__attribute__((weak,unused,section(".text.nolibc_memmove"))) +void *memmove(void *dst, const void *src, size_t len) +{ + size_t dir, pos; + + pos = len; + dir = -1; + + if (dst < src) { + pos = -1; + dir = 1; + } + + while (len) { + pos += dir; + ((char *)dst)[pos] = ((const char *)src)[pos]; + len--; + } + return dst; +} + +/* must be exported, as it's used by libgcc on ARM */ +__attribute__((weak,unused,section(".text.nolibc_memcpy"))) +void *memcpy(void *dst, const void *src, size_t len) +{ + return _nolibc_memcpy_up(dst, src, len); +} + +/* might be ignored by the compiler without -ffreestanding, then found as + * missing. + */ +__attribute__((weak,unused,section(".text.nolibc_memset"))) +void *memset(void *dst, int b, size_t len) +{ + char *p = dst; + + while (len--) + *(p++) = b; + return dst; +} + +static __attribute__((unused)) +char *strchr(const char *s, int c) +{ + while (*s) { + if (*s == (char)c) + return (char *)s; + s++; + } + return NULL; +} + +static __attribute__((unused)) +int strcmp(const char *a, const char *b) +{ + unsigned int c; + int diff; + + while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) + ; + return diff; +} + +static __attribute__((unused)) +char *strcpy(char *dst, const char *src) +{ + char *ret = dst; + + while ((*dst++ = *src++)); + return ret; +} + +/* this function is only used with arguments that are not constants or when + * it's not known because optimizations are disabled. + */ +static __attribute__((unused)) +size_t nolibc_strlen(const char *str) +{ + size_t len; + + for (len = 0; str[len]; len++); + return len; +} + +/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and + * the two branches, then will rely on an external definition of strlen(). + */ +#if defined(__OPTIMIZE__) +#define strlen(str) ({ \ + __builtin_constant_p((str)) ? \ + __builtin_strlen((str)) : \ + nolibc_strlen((str)); \ +}) +#else +#define strlen(str) nolibc_strlen((str)) +#endif + +static __attribute__((unused)) +size_t strnlen(const char *str, size_t maxlen) +{ + size_t len; + + for (len = 0; (len < maxlen) && str[len]; len++); + return len; +} + +static __attribute__((unused)) +char *strdup(const char *str) +{ + size_t len; + char *ret; + + len = strlen(str); + ret = malloc(len + 1); + if (__builtin_expect(ret != NULL, 1)) + memcpy(ret, str, len + 1); + + return ret; +} + +static __attribute__((unused)) +char *strndup(const char *str, size_t maxlen) +{ + size_t len; + char *ret; + + len = strnlen(str, maxlen); + ret = malloc(len + 1); + if (__builtin_expect(ret != NULL, 1)) { + memcpy(ret, str, len); + ret[len] = '\0'; + } + + return ret; +} + +static __attribute__((unused)) +size_t strlcat(char *dst, const char *src, size_t size) +{ + size_t len; + char c; + + for (len = 0; dst[len]; len++) + ; + + for (;;) { + c = *src; + if (len < size) + dst[len] = c; + if (!c) + break; + len++; + src++; + } + + return len; +} + +static __attribute__((unused)) +size_t strlcpy(char *dst, const char *src, size_t size) +{ + size_t len; + char c; + + for (len = 0;;) { + c = src[len]; + if (len < size) + dst[len] = c; + if (!c) + break; + len++; + } + return len; +} + +static __attribute__((unused)) +char *strncat(char *dst, const char *src, size_t size) +{ + char *orig = dst; + + while (*dst) + dst++; + + while (size && (*dst = *src)) { + src++; + dst++; + size--; + } + + *dst = 0; + return orig; +} + +static __attribute__((unused)) +int strncmp(const char *a, const char *b, size_t size) +{ + unsigned int c; + int diff = 0; + + while (size-- && + !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) + ; + + return diff; +} + +static __attribute__((unused)) +char *strncpy(char *dst, const char *src, size_t size) +{ + size_t len; + + for (len = 0; len < size; len++) + if ((dst[len] = *src)) + src++; + return dst; +} + +static __attribute__((unused)) +char *strrchr(const char *s, int c) +{ + const char *ret = NULL; + + while (*s) { + if (*s == (char)c) + ret = s; + s++; + } + return (char *)ret; +} + +#endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h new file mode 100644 index 000000000000..08491070387b --- /dev/null +++ b/tools/include/nolibc/sys.h @@ -0,0 +1,1247 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Syscall definitions for NOLIBC (those in man(2)) + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_SYS_H +#define _NOLIBC_SYS_H + +#include <stdarg.h> +#include "std.h" + +/* system includes */ +#include <asm/unistd.h> +#include <asm/signal.h> // for SIGCHLD +#include <asm/ioctls.h> +#include <asm/mman.h> +#include <linux/fs.h> +#include <linux/loop.h> +#include <linux/time.h> + +#include "arch.h" +#include "errno.h" +#include "types.h" + + +/* Functions in this file only describe syscalls. They're declared static so + * that the compiler usually decides to inline them while still being allowed + * to pass a pointer to one of their instances. Each syscall exists in two + * versions: + * - the "internal" ones, which matches the raw syscall interface at the + * kernel level, which may sometimes slightly differ from the documented + * libc-level ones. For example most of them return either a valid value + * or -errno. All of these are prefixed with "sys_". They may be called + * by non-portable applications if desired. + * + * - the "exported" ones, whose interface must closely match the one + * documented in man(2), that applications are supposed to expect. These + * ones rely on the internal ones, and set errno. + * + * Each syscall will be defined with the two functions, sorted in alphabetical + * order applied to the exported names. + * + * In case of doubt about the relevance of a function here, only those which + * set errno should be defined here. Wrappers like those appearing in man(3) + * should not be placed here. + */ + + +/* + * int brk(void *addr); + * void *sbrk(intptr_t inc) + */ + +static __attribute__((unused)) +void *sys_brk(void *addr) +{ + return (void *)my_syscall1(__NR_brk, addr); +} + +static __attribute__((unused)) +int brk(void *addr) +{ + void *ret = sys_brk(addr); + + if (!ret) { + SET_ERRNO(ENOMEM); + return -1; + } + return 0; +} + +static __attribute__((unused)) +void *sbrk(intptr_t inc) +{ + void *ret; + + /* first call to find current end */ + if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) + return ret + inc; + + SET_ERRNO(ENOMEM); + return (void *)-1; +} + + +/* + * int chdir(const char *path); + */ + +static __attribute__((unused)) +int sys_chdir(const char *path) +{ + return my_syscall1(__NR_chdir, path); +} + +static __attribute__((unused)) +int chdir(const char *path) +{ + int ret = sys_chdir(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int chmod(const char *path, mode_t mode); + */ + +static __attribute__((unused)) +int sys_chmod(const char *path, mode_t mode) +{ +#ifdef __NR_fchmodat + return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); +#elif defined(__NR_chmod) + return my_syscall2(__NR_chmod, path, mode); +#else +#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod() +#endif +} + +static __attribute__((unused)) +int chmod(const char *path, mode_t mode) +{ + int ret = sys_chmod(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int chown(const char *path, uid_t owner, gid_t group); + */ + +static __attribute__((unused)) +int sys_chown(const char *path, uid_t owner, gid_t group) +{ +#ifdef __NR_fchownat + return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); +#elif defined(__NR_chown) + return my_syscall3(__NR_chown, path, owner, group); +#else +#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown() +#endif +} + +static __attribute__((unused)) +int chown(const char *path, uid_t owner, gid_t group) +{ + int ret = sys_chown(path, owner, group); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int chroot(const char *path); + */ + +static __attribute__((unused)) +int sys_chroot(const char *path) +{ + return my_syscall1(__NR_chroot, path); +} + +static __attribute__((unused)) +int chroot(const char *path) +{ + int ret = sys_chroot(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int close(int fd); + */ + +static __attribute__((unused)) +int sys_close(int fd) +{ + return my_syscall1(__NR_close, fd); +} + +static __attribute__((unused)) +int close(int fd) +{ + int ret = sys_close(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int dup(int fd); + */ + +static __attribute__((unused)) +int sys_dup(int fd) +{ + return my_syscall1(__NR_dup, fd); +} + +static __attribute__((unused)) +int dup(int fd) +{ + int ret = sys_dup(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int dup2(int old, int new); + */ + +static __attribute__((unused)) +int sys_dup2(int old, int new) +{ +#ifdef __NR_dup3 + return my_syscall3(__NR_dup3, old, new, 0); +#elif defined(__NR_dup2) + return my_syscall2(__NR_dup2, old, new); +#else +#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2() +#endif +} + +static __attribute__((unused)) +int dup2(int old, int new) +{ + int ret = sys_dup2(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int dup3(int old, int new, int flags); + */ + +#ifdef __NR_dup3 +static __attribute__((unused)) +int sys_dup3(int old, int new, int flags) +{ + return my_syscall3(__NR_dup3, old, new, flags); +} + +static __attribute__((unused)) +int dup3(int old, int new, int flags) +{ + int ret = sys_dup3(old, new, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} +#endif + + +/* + * int execve(const char *filename, char *const argv[], char *const envp[]); + */ + +static __attribute__((unused)) +int sys_execve(const char *filename, char *const argv[], char *const envp[]) +{ + return my_syscall3(__NR_execve, filename, argv, envp); +} + +static __attribute__((unused)) +int execve(const char *filename, char *const argv[], char *const envp[]) +{ + int ret = sys_execve(filename, argv, envp); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * void exit(int status); + */ + +static __attribute__((noreturn,unused)) +void sys_exit(int status) +{ + my_syscall1(__NR_exit, status & 255); + while(1); // shut the "noreturn" warnings. +} + +static __attribute__((noreturn,unused)) +void exit(int status) +{ + sys_exit(status); +} + + +/* + * pid_t fork(void); + */ + +static __attribute__((unused)) +pid_t sys_fork(void) +{ +#ifdef __NR_clone + /* note: some archs only have clone() and not fork(). Different archs + * have a different API, but most archs have the flags on first arg and + * will not use the rest with no other flag. + */ + return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); +#elif defined(__NR_fork) + return my_syscall0(__NR_fork); +#else +#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork() +#endif +} + +static __attribute__((unused)) +pid_t fork(void) +{ + pid_t ret = sys_fork(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int fsync(int fd); + */ + +static __attribute__((unused)) +int sys_fsync(int fd) +{ + return my_syscall1(__NR_fsync, fd); +} + +static __attribute__((unused)) +int fsync(int fd) +{ + int ret = sys_fsync(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int getdents64(int fd, struct linux_dirent64 *dirp, int count); + */ + +static __attribute__((unused)) +int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return my_syscall3(__NR_getdents64, fd, dirp, count); +} + +static __attribute__((unused)) +int getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + int ret = sys_getdents64(fd, dirp, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * pid_t getpgid(pid_t pid); + */ + +static __attribute__((unused)) +pid_t sys_getpgid(pid_t pid) +{ + return my_syscall1(__NR_getpgid, pid); +} + +static __attribute__((unused)) +pid_t getpgid(pid_t pid) +{ + pid_t ret = sys_getpgid(pid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * pid_t getpgrp(void); + */ + +static __attribute__((unused)) +pid_t sys_getpgrp(void) +{ + return sys_getpgid(0); +} + +static __attribute__((unused)) +pid_t getpgrp(void) +{ + return sys_getpgrp(); +} + + +/* + * pid_t getpid(void); + */ + +static __attribute__((unused)) +pid_t sys_getpid(void) +{ + return my_syscall0(__NR_getpid); +} + +static __attribute__((unused)) +pid_t getpid(void) +{ + return sys_getpid(); +} + + +/* + * pid_t getppid(void); + */ + +static __attribute__((unused)) +pid_t sys_getppid(void) +{ + return my_syscall0(__NR_getppid); +} + +static __attribute__((unused)) +pid_t getppid(void) +{ + return sys_getppid(); +} + + +/* + * pid_t gettid(void); + */ + +static __attribute__((unused)) +pid_t sys_gettid(void) +{ + return my_syscall0(__NR_gettid); +} + +static __attribute__((unused)) +pid_t gettid(void) +{ + return sys_gettid(); +} + + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return my_syscall2(__NR_gettimeofday, tv, tz); +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int ret = sys_gettimeofday(tv, tz); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int ioctl(int fd, unsigned long req, void *value); + */ + +static __attribute__((unused)) +int sys_ioctl(int fd, unsigned long req, void *value) +{ + return my_syscall3(__NR_ioctl, fd, req, value); +} + +static __attribute__((unused)) +int ioctl(int fd, unsigned long req, void *value) +{ + int ret = sys_ioctl(fd, req, value); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +/* + * int kill(pid_t pid, int signal); + */ + +static __attribute__((unused)) +int sys_kill(pid_t pid, int signal) +{ + return my_syscall2(__NR_kill, pid, signal); +} + +static __attribute__((unused)) +int kill(pid_t pid, int signal) +{ + int ret = sys_kill(pid, signal); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int link(const char *old, const char *new); + */ + +static __attribute__((unused)) +int sys_link(const char *old, const char *new) +{ +#ifdef __NR_linkat + return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); +#elif defined(__NR_link) + return my_syscall2(__NR_link, old, new); +#else +#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link() +#endif +} + +static __attribute__((unused)) +int link(const char *old, const char *new) +{ + int ret = sys_link(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * off_t lseek(int fd, off_t offset, int whence); + */ + +static __attribute__((unused)) +off_t sys_lseek(int fd, off_t offset, int whence) +{ + return my_syscall3(__NR_lseek, fd, offset, whence); +} + +static __attribute__((unused)) +off_t lseek(int fd, off_t offset, int whence) +{ + off_t ret = sys_lseek(fd, offset, whence); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int mkdir(const char *path, mode_t mode); + */ + +static __attribute__((unused)) +int sys_mkdir(const char *path, mode_t mode) +{ +#ifdef __NR_mkdirat + return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); +#elif defined(__NR_mkdir) + return my_syscall2(__NR_mkdir, path, mode); +#else +#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir() +#endif +} + +static __attribute__((unused)) +int mkdir(const char *path, mode_t mode) +{ + int ret = sys_mkdir(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int mknod(const char *path, mode_t mode, dev_t dev); + */ + +static __attribute__((unused)) +long sys_mknod(const char *path, mode_t mode, dev_t dev) +{ +#ifdef __NR_mknodat + return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); +#elif defined(__NR_mknod) + return my_syscall3(__NR_mknod, path, mode, dev); +#else +#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod() +#endif +} + +static __attribute__((unused)) +int mknod(const char *path, mode_t mode, dev_t dev) +{ + int ret = sys_mknod(path, mode, dev); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +#ifndef MAP_SHARED +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ +#ifndef my_syscall6 + /* Function not implemented. */ + return -ENOSYS; +#else + + int n; + +#if defined(__i386__) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +#endif +} + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + int ret = sys_munmap(addr, length); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + int ret = sys_mount(src, tgt, fst, flags, data); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ +#ifdef __NR_openat + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +#elif defined(__NR_open) + return my_syscall3(__NR_open, path, flags, mode); +#else +#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open() +#endif +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + int ret; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + ret = sys_open(path, flags, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int pivot_root(const char *new, const char *old); + */ + +static __attribute__((unused)) +int sys_pivot_root(const char *new, const char *old) +{ + return my_syscall2(__NR_pivot_root, new, old); +} + +static __attribute__((unused)) +int pivot_root(const char *new, const char *old) +{ + int ret = sys_pivot_root(new, old); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL); +#elif defined(__NR_poll) + return my_syscall3(__NR_poll, fds, nfds, timeout); +#else +#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll() +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + int ret = sys_poll(fds, nfds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * ssize_t read(int fd, void *buf, size_t count); + */ + +static __attribute__((unused)) +ssize_t sys_read(int fd, void *buf, size_t count) +{ + return my_syscall3(__NR_read, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t read(int fd, void *buf, size_t count) +{ + ssize_t ret = sys_read(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int sched_yield(void); + */ + +static __attribute__((unused)) +int sys_sched_yield(void) +{ + return my_syscall0(__NR_sched_yield); +} + +static __attribute__((unused)) +int sched_yield(void) +{ + int ret = sys_sched_yield(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int select(int nfds, fd_set *read_fds, fd_set *write_fds, + * fd_set *except_fds, struct timeval *timeout); + */ + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#elif defined(__NR__newselect) || defined(__NR_select) +#ifndef __NR__newselect +#define __NR__newselect __NR_select +#endif + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#else +#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select() +#endif +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + int ret = sys_select(nfds, rfds, wfds, efds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int setpgid(pid_t pid, pid_t pgid); + */ + +static __attribute__((unused)) +int sys_setpgid(pid_t pid, pid_t pgid) +{ + return my_syscall2(__NR_setpgid, pid, pgid); +} + +static __attribute__((unused)) +int setpgid(pid_t pid, pid_t pgid) +{ + int ret = sys_setpgid(pid, pgid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * pid_t setsid(void); + */ + +static __attribute__((unused)) +pid_t sys_setsid(void) +{ + return my_syscall0(__NR_setsid); +} + +static __attribute__((unused)) +pid_t setsid(void) +{ + pid_t ret = sys_setsid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int stat(const char *path, struct stat *buf); + * Warning: the struct stat's layout is arch-dependent. + */ + +static __attribute__((unused)) +int sys_stat(const char *path, struct stat *buf) +{ + struct sys_stat_struct stat; + long ret; + +#ifdef __NR_newfstatat + /* only solution for arm64 */ + ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); +#elif defined(__NR_stat) + ret = my_syscall2(__NR_stat, path, &stat); +#else +#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat() +#endif + buf->st_dev = stat.st_dev; + buf->st_ino = stat.st_ino; + buf->st_mode = stat.st_mode; + buf->st_nlink = stat.st_nlink; + buf->st_uid = stat.st_uid; + buf->st_gid = stat.st_gid; + buf->st_rdev = stat.st_rdev; + buf->st_size = stat.st_size; + buf->st_blksize = stat.st_blksize; + buf->st_blocks = stat.st_blocks; + buf->st_atime = stat.st_atime; + buf->st_mtime = stat.st_mtime; + buf->st_ctime = stat.st_ctime; + return ret; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + int ret = sys_stat(path, buf); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int symlink(const char *old, const char *new); + */ + +static __attribute__((unused)) +int sys_symlink(const char *old, const char *new) +{ +#ifdef __NR_symlinkat + return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); +#elif defined(__NR_symlink) + return my_syscall2(__NR_symlink, old, new); +#else +#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink() +#endif +} + +static __attribute__((unused)) +int symlink(const char *old, const char *new) +{ + int ret = sys_symlink(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * mode_t umask(mode_t mode); + */ + +static __attribute__((unused)) +mode_t sys_umask(mode_t mode) +{ + return my_syscall1(__NR_umask, mode); +} + +static __attribute__((unused)) +mode_t umask(mode_t mode) +{ + return sys_umask(mode); +} + + +/* + * int umount2(const char *path, int flags); + */ + +static __attribute__((unused)) +int sys_umount2(const char *path, int flags) +{ + return my_syscall2(__NR_umount2, path, flags); +} + +static __attribute__((unused)) +int umount2(const char *path, int flags) +{ + int ret = sys_umount2(path, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * int unlink(const char *path); + */ + +static __attribute__((unused)) +int sys_unlink(const char *path) +{ +#ifdef __NR_unlinkat + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); +#elif defined(__NR_unlink) + return my_syscall1(__NR_unlink, path); +#else +#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink() +#endif +} + +static __attribute__((unused)) +int unlink(const char *path) +{ + int ret = sys_unlink(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * pid_t wait(int *status); + * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); + * pid_t waitpid(pid_t pid, int *status, int options); + */ + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return my_syscall4(__NR_wait4, pid, status, options, rusage); +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + pid_t ret = sys_wait4(-1, status, 0, NULL); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + pid_t ret = sys_wait4(pid, status, options, rusage); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + pid_t ret = sys_wait4(pid, status, options, NULL); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +/* + * ssize_t write(int fd, const void *buf, size_t count); + */ + +static __attribute__((unused)) +ssize_t sys_write(int fd, const void *buf, size_t count) +{ + return my_syscall3(__NR_write, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t write(int fd, const void *buf, size_t count) +{ + ssize_t ret = sys_write(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + + +#endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h new file mode 100644 index 000000000000..d18b7661fdd7 --- /dev/null +++ b/tools/include/nolibc/time.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_TIME_H +#define _NOLIBC_TIME_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + +static __attribute__((unused)) +time_t time(time_t *tptr) +{ + struct timeval tv; + + /* note, cannot fail here */ + sys_gettimeofday(&tv, NULL); + + if (tptr) + *tptr = tv.tv_sec; + return tv.tv_sec; +} + +#endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h new file mode 100644 index 000000000000..959997034e55 --- /dev/null +++ b/tools/include/nolibc/types.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Special types used by various syscalls for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_TYPES_H +#define _NOLIBC_TYPES_H + +#include "std.h" +#include <linux/time.h> + + +/* Only the generic macros and types may be defined here. The arch-specific + * ones such as the O_RDONLY and related macros used by fcntl() and open(), or + * the layout of sys_stat_struct must not be defined here. + */ + +/* stat flags (WARNING, octal here) */ +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFBLK 0060000 +#define S_IFREG 0100000 +#define S_IFIFO 0010000 +#define S_IFLNK 0120000 +#define S_IFSOCK 0140000 +#define S_IFMT 0170000 + +#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) + +/* dirent types */ +#define DT_UNKNOWN 0x0 +#define DT_FIFO 0x1 +#define DT_CHR 0x2 +#define DT_DIR 0x4 +#define DT_BLK 0x6 +#define DT_REG 0x8 +#define DT_LNK 0xa +#define DT_SOCK 0xc + +/* commonly an fd_set represents 256 FDs */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different + * values. + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifndef MAXPATHLEN +#define MAXPATHLEN (PATH_MAX) +#endif + +/* Special FD used by all the *at functions */ +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif + +/* whence values for lseek() */ +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +/* cmd for reboot() */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 0x28121969 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART 0x01234567 +#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 + +/* Macros used on waitpid()'s return status */ +#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +#define WIFEXITED(status) (((status) & 0x7f) == 0) + +/* waitpid() flags */ +#define WNOHANG 1 + +/* standard exit() codes */ +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 + +/* for select() */ +typedef struct { + uint32_t fd32[(FD_SETSIZE + 31) / 32]; +} fd_set; + +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \ + } while (0) + +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fd32[__fd / 32] |= 1U << (__fd & 31); \ + } while (0) + +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \ + __r; \ + }) + +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \ + __set->fd32[__idx] = 0; \ + } while (0) + +/* for poll() */ +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 + +struct pollfd { + int fd; + short int events; + short int revents; +}; + +/* for getdents64() */ +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +/* needed by wait4() */ +struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +/* The format of the struct as returned by the libc to the application, which + * significantly differs from the format returned by the stat() syscall flavours. + */ +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + time_t st_atime; /* time of last access */ + time_t st_mtime; /* time of last modification */ + time_t st_ctime; /* time of last status change */ +}; + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)(((dev) & 0xff)) + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#ifndef container_of +#define container_of(PTR, TYPE, FIELD) ({ \ + __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \ + (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \ +}) +#endif + +#endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h new file mode 100644 index 000000000000..1c25e20ee360 --- /dev/null +++ b/tools/include/nolibc/unistd.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * unistd function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_UNISTD_H +#define _NOLIBC_UNISTD_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + + +static __attribute__((unused)) +int msleep(unsigned int msecs) +{ + struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return (my_timeval.tv_sec * 1000) + + (my_timeval.tv_usec / 1000) + + !!(my_timeval.tv_usec % 1000); + else + return 0; +} + +static __attribute__((unused)) +unsigned int sleep(unsigned int seconds) +{ + struct timeval my_timeval = { seconds, 0 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return my_timeval.tv_sec + !!my_timeval.tv_usec; + else + return 0; +} + +static __attribute__((unused)) +int usleep(unsigned int usecs) +{ + struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 }; + + return sys_select(0, 0, 0, 0, &my_timeval); +} + +static __attribute__((unused)) +int tcsetpgrp(int fd, pid_t pid) +{ + return ioctl(fd, TIOCSPGRP, &pid); +} + +#endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 39ebf6192016..9fa75943f2ed 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -806,9 +806,9 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { - int nr_opts = 0, len; + int nr_opts = 0, nr_group = 0, len; const struct option *o = opts; - struct option *ordered; + struct option *opt, *ordered, *group; for (o = opts; o->type != OPTION_END; o++) ++nr_opts; @@ -819,7 +819,18 @@ static struct option *options__order(const struct option *opts) goto out; memcpy(ordered, opts, len); - qsort(ordered, nr_opts, sizeof(*o), option__cmp); + /* sort each option group individually */ + for (opt = group = ordered; opt->type != OPTION_END; opt++) { + if (opt->type == OPTION_GROUP) { + qsort(group, nr_group, sizeof(*opt), option__cmp); + group = opt + 1; + nr_group = 0; + continue; + } + nr_group++; + } + qsort(group, nr_group, sizeof(*opt), option__cmp); + out: return ordered; } diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore new file mode 100644 index 000000000000..5d2aeda80fea --- /dev/null +++ b/tools/lib/thermal/.gitignore @@ -0,0 +1,2 @@ +libthermal.so* +libthermal.pc diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build new file mode 100644 index 000000000000..4a892d9e24f9 --- /dev/null +++ b/tools/lib/thermal/Build @@ -0,0 +1,5 @@ +libthermal-y += commands.o +libthermal-y += events.o +libthermal-y += thermal_nl.o +libthermal-y += sampling.o +libthermal-y += thermal.o diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile new file mode 100644 index 000000000000..2d0d255fd0e1 --- /dev/null +++ b/tools/lib/thermal/Makefile @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_VERSION = 0 +LIBTHERMAL_PATCHLEVEL = 0 +LIBTHERMAL_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +INCLUDES = \ +-I/usr/include/libnl3 \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +VERSION_SCRIPT := libthermal.map + +PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION) +VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION) + +LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION) +LIBTHERMAL_A := $(OUTPUT)libthermal.a +LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o +LIBTHERMAL_PC := $(OUTPUT)libthermal.pc +LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so* + +THERMAL_UAPI := include/uapi/linux/thermal.h + +$(THERMAL_UAPI): FORCE + ln -sf $(srctree)/$@ $(srctree)/tools/$@ + +$(LIBTHERMAL_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal + +$(LIBTHERMAL_A): $(LIBTHERMAL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN) + +$(LIBTHERMAL_SO): $(LIBTHERMAL_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal.so + @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION) + + +libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + +$(LIBTHERMAL_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \ + $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c new file mode 100644 index 000000000000..73d4d4e8d6ec --- /dev/null +++ b/tools/lib/thermal/commands.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { + /* Thermal zone */ + [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING }, + + /* Governor(s) */ + [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING }, + + /* Cooling devices */ + [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, +}; + +static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) +{ + struct nlattr *attr; + struct thermal_zone *__tz = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) { + + size++; + + __tz = realloc(__tz, sizeof(*__tz) * (size + 2)); + if (!__tz) + return THERMAL_ERROR; + + __tz[size - 1].id = nla_get_u32(attr); + } + + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME) + nla_strlcpy(__tz[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (__tz) + __tz[size].id = -1; + + *tz = __tz; + + return THERMAL_SUCCESS; +} + +static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev) +{ + struct nlattr *attr; + struct thermal_cdev *__cdev = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) { + + size++; + + __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2)); + if (!__cdev) + return THERMAL_ERROR; + + __cdev[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) { + nla_strlcpy(__cdev[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE) + __cdev[size - 1].cur_state = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE) + __cdev[size - 1].max_state = nla_get_u32(attr); + } + + if (__cdev) + __cdev[size].id = -1; + + *cdev = __cdev; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_trip *__tt = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE) + __tt[size - 1].type = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP) + __tt[size - 1].temp = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST) + __tt[size - 1].hyst = nla_get_u32(attr); + } + + if (__tt) + __tt[size].id = -1; + + tz->trip = __tt; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]) + tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]); + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) { + nla_strlcpy(tz->governor, + info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME], + THERMAL_NAME_LENGTH); + } + + return THERMAL_SUCCESS; +} + +static int handle_netlink(struct nl_cache_ops *unused, + struct genl_cmd *cmd, + struct genl_info *info, void *arg) +{ + int ret; + + switch (cmd->c_id) { + + case THERMAL_GENL_CMD_TZ_GET_ID: + ret = parse_tz_get(info, arg); + break; + + case THERMAL_GENL_CMD_CDEV_GET: + ret = parse_cdev_get(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TEMP: + ret = parse_tz_get_temp(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TRIP: + ret = parse_tz_get_trip(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_GOV: + ret = parse_tz_get_gov(info, arg); + break; + + default: + return THERMAL_ERROR; + } + + return ret; +} + +static struct genl_cmd thermal_cmds[] = { + { + .c_id = THERMAL_GENL_CMD_TZ_GET_ID, + .c_name = (char *)"List thermal zones", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_GOV, + .c_name = (char *)"Get governor", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP, + .c_name = (char *)"Get thermal zone temperature", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP, + .c_name = (char *)"Get thermal zone trip points", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_CDEV_GET, + .c_name = (char *)"Get cooling devices", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, +}; + +static struct genl_ops thermal_cmd_ops = { + .o_name = (char *)"thermal", + .o_cmds = thermal_cmds, + .o_ncmds = ARRAY_SIZE(thermal_cmds), +}; + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, + int flags, void *arg) +{ + struct nl_msg *msg; + void *hdr; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, + 0, flags, cmd, THERMAL_GENL_VERSION); + if (!hdr) + return THERMAL_ERROR; + + if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) + return THERMAL_ERROR; + + if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) + return THERMAL_ERROR; + + nlmsg_free(msg); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + NLM_F_DUMP | NLM_F_ACK, tz); +} + +thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + NLM_F_DUMP | NLM_F_ACK, tc); +} + +thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, + 0, tz); +} + +thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); +} + +thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_exit(struct thermal_handler *th) +{ + if (genl_unregister_family(&thermal_cmd_ops)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_cmd, th->cb_cmd); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_init(struct thermal_handler *th) +{ + int ret; + int family; + + if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd)) + return THERMAL_ERROR; + + ret = genl_register_family(&thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + family = genl_ctrl_resolve(th->sk_cmd, "nlctrl"); + if (family != GENL_ID_CTRL) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c new file mode 100644 index 000000000000..a7a55d1a0c4c --- /dev/null +++ b/tools/lib/thermal/events.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <linux/netlink.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + + +#include <thermal.h> +#include "thermal_nl.h" + +/* + * Optimization: fill this array to tell which event we do want to pay + * attention to. That happens at init time with the ops + * structure. Each ops will enable the event and the general handler + * will be able to discard the event if there is not ops associated + * with it. + */ +static int enabled_ops[__THERMAL_GENL_EVENT_MAX]; + +static int handle_thermal_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_events_ops *ops = &thp->th->ops->events; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + arg = thp->arg; + + /* + * This is an event we don't care of, bail out. + */ + if (!enabled_ops[genlhdr->cmd]) + return THERMAL_SUCCESS; + + switch (genlhdr->cmd) { + + case THERMAL_GENL_EVENT_TZ_CREATE: + return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DELETE: + return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_ENABLE: + return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DISABLE: + return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE: + return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_ADD: + return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DELETE: + return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_UP: + return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DOWN: + return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_CDEV_ADD: + return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg); + + case THERMAL_GENL_EVENT_CDEV_DELETE: + return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg); + + case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE: + return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg); + + case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: + return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + default: + return -1; + } +} + +static void thermal_events_ops_init(struct thermal_events_ops *ops) +{ + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; +} + +thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_event, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_event, th->cb_event); +} + +int thermal_events_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_event); +} + +thermal_error_t thermal_events_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_event, th->cb_event); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_events_init(struct thermal_handler *th) +{ + thermal_events_ops_init(&th->ops->events); + + if (nl_thermal_connect(&th->sk_event, &th->cb_event)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h new file mode 100644 index 000000000000..1abc560602cf --- /dev/null +++ b/tools/lib/thermal/include/thermal.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __LIBTHERMAL_H +#define __LIBTHERMAL_H + +#include <linux/thermal.h> + +#ifndef LIBTHERMAL_API +#define LIBTHERMAL_API __attribute__((visibility("default"))) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct thermal_sampling_ops { + int (*tz_temp)(int tz_id, int temp, void *arg); +}; + +struct thermal_events_ops { + int (*tz_create)(const char *name, int tz_id, void *arg); + int (*tz_delete)(int tz_id, void *arg); + int (*tz_enable)(int tz_id, void *arg); + int (*tz_disable)(int tz_id, void *arg); + int (*trip_high)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_low)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_delete)(int tz_id, int trip_id, void *arg); + int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg); + int (*cdev_delete)(int cdev_id, void *arg); + int (*cdev_update)(int cdev_id, int cur_state, void *arg); + int (*gov_change)(int tz_id, const char *gov_name, void *arg); +}; + +struct thermal_ops { + struct thermal_sampling_ops sampling; + struct thermal_events_ops events; +}; + +struct thermal_trip { + int id; + int type; + int temp; + int hyst; +}; + +struct thermal_zone { + int id; + int temp; + char name[THERMAL_NAME_LENGTH]; + char governor[THERMAL_NAME_LENGTH]; + struct thermal_trip *trip; +}; + +struct thermal_cdev { + int id; + char name[THERMAL_NAME_LENGTH]; + int max_state; + int min_state; + int cur_state; +}; + +typedef enum { + THERMAL_ERROR = -1, + THERMAL_SUCCESS = 0, +} thermal_error_t; + +struct thermal_handler; + +typedef int (*cb_tz_t)(struct thermal_zone *, void *); + +typedef int (*cb_tt_t)(struct thermal_trip *, void *); + +typedef int (*cb_tc_t)(struct thermal_cdev *, void *); + +LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th); + +LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops); + +LIBTHERMAL_API void thermal_exit(struct thermal_handler *th); + +/* + * Netlink thermal events + */ +LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th); + +/* + * Netlink thermal commands + */ +LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, + struct thermal_zone **tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, + struct thermal_cdev **tc); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, + struct thermal_zone *tz); + +/* + * Netlink thermal samples + */ +LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th); + +#endif /* __LIBTHERMAL_H */ + +#ifdef __cplusplus +} +#endif diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map new file mode 100644 index 000000000000..d5e77738c7a4 --- /dev/null +++ b/tools/lib/thermal/libthermal.map @@ -0,0 +1,25 @@ +LIBTHERMAL_0.0.1 { + global: + thermal_init; + for_each_thermal_zone; + for_each_thermal_trip; + for_each_thermal_cdev; + thermal_zone_find_by_name; + thermal_zone_find_by_id; + thermal_zone_discover; + thermal_init; + thermal_events_init; + thermal_events_handle; + thermal_events_fd; + thermal_cmd_init; + thermal_cmd_get_tz; + thermal_cmd_get_cdev; + thermal_cmd_get_trip; + thermal_cmd_get_governor; + thermal_cmd_get_temp; + thermal_sampling_init; + thermal_sampling_handle; + thermal_sampling_fd; +local: + *; +}; diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template new file mode 100644 index 000000000000..6f3769731b59 --- /dev/null +++ b/tools/lib/thermal/libthermal.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I{include}/libnl3 diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c new file mode 100644 index 000000000000..ee818f4e9654 --- /dev/null +++ b/tools/lib/thermal/sampling.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static int handle_thermal_sample(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_handler *th = thp->th; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + switch (genlhdr->cmd) { + + case THERMAL_GENL_SAMPLING_TEMP: + return th->ops->sampling.tz_temp( + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + default: + return THERMAL_ERROR; + } +} + +thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_sample, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_sampling, th->cb_sampling); +} + +int thermal_sampling_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_sampling); +} + +thermal_error_t thermal_sampling_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_sampling, th->cb_sampling); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_sampling_init(struct thermal_handler *th) +{ + if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_SAMPLING_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c new file mode 100644 index 000000000000..72a76dc205bc --- /dev/null +++ b/tools/lib/thermal/thermal.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <thermal.h> + +#include "thermal_nl.h" + +int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) +{ + int i, ret = 0; + + if (!cdev) + return 0; + + for (i = 0; cdev[i].id != -1; i++) + ret |= cb(&cdev[i], arg); + + return ret; +} + +int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg) +{ + int i, ret = 0; + + if (!tt) + return 0; + + for (i = 0; tt[i].id != -1; i++) + ret |= cb(&tt[i], arg); + + return ret; +} + +int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg) +{ + int i, ret = 0; + + if (!tz) + return 0; + + for (i = 0; tz[i].id != -1; i++) + ret |= cb(&tz[i], arg); + + return ret; +} + +struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name) +{ + int i; + + if (!tz || !name) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (!strcmp(tz[i].name, name)) + return &tz[i]; + } + + return NULL; +} + +struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id) +{ + int i; + + if (!tz || id < 0) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (tz[i].id == id) + return &tz[i]; + } + + return NULL; +} + +static int __thermal_zone_discover(struct thermal_zone *tz, void *th) +{ + if (thermal_cmd_get_trip(th, tz) < 0) + return -1; + + if (thermal_cmd_get_governor(th, tz)) + return -1; + + return 0; +} + +struct thermal_zone *thermal_zone_discover(struct thermal_handler *th) +{ + struct thermal_zone *tz; + + if (thermal_cmd_get_tz(th, &tz) < 0) + return NULL; + + if (for_each_thermal_zone(tz, __thermal_zone_discover, th)) + return NULL; + + return tz; +} + +void thermal_exit(struct thermal_handler *th) +{ + thermal_cmd_exit(th); + thermal_events_exit(th); + thermal_sampling_exit(th); + + free(th); +} + +struct thermal_handler *thermal_init(struct thermal_ops *ops) +{ + struct thermal_handler *th; + + th = malloc(sizeof(*th)); + if (!th) + return NULL; + th->ops = ops; + + if (thermal_events_init(th)) + goto out_free; + + if (thermal_sampling_init(th)) + goto out_free; + + if (thermal_cmd_init(th)) + goto out_free; + + return th; + +out_free: + free(th); + + return NULL; +} diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c new file mode 100644 index 000000000000..b05cf9569858 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +struct handler_args { + const char *group; + int id; +}; + +static __thread int err; +static __thread int done; + +static int nl_seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err, + void *arg) +{ + int *ret = arg; + + if (ret) + *ret = nl_err->error; + + return NL_STOP; +} + +static int nl_finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +static int nl_ack_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), void *data) +{ + if (!rx_handler) + return THERMAL_ERROR; + + err = nl_send_auto_complete(sock, msg); + if (err < 0) + return err; + + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data); + + err = done = 0; + + while (err == 0 && done == 0) + nl_recvmsgs(sock, cb); + + return err; +} + +static int nl_family_handler(struct nl_msg *msg, void *arg) +{ + struct handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return THERMAL_ERROR; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, + nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + + break; + } + + return THERMAL_SUCCESS; +} + +static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = 0, ctrlid; + struct handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp); + if (ret) + goto nla_put_failure; + + ret = grp.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb) +{ + struct nl_cb *cb; + struct nl_sock *sock; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) + return THERMAL_ERROR; + + sock = nl_socket_alloc(); + if (!sock) + goto out_cb_free; + + if (genl_connect(sock)) + goto out_socket_free; + + if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) || + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) || + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) || + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done)) + return THERMAL_ERROR; + + *nl_sock = sock; + *nl_cb = cb; + + return THERMAL_SUCCESS; + +out_socket_free: + nl_socket_free(sock); +out_cb_free: + nl_cb_put(cb); + return THERMAL_ERROR; +} + +void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb) +{ + nl_close(nl_sock); + nl_socket_free(nl_sock); + nl_cb_put(nl_cb); +} + +int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_drop_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} + +int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_add_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h new file mode 100644 index 000000000000..ddf635642f07 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_H +#define __THERMAL_H + +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/mngt.h> +#include <netlink/genl/ctrl.h> + +struct thermal_handler { + int done; + int error; + struct thermal_ops *ops; + struct nl_msg *msg; + struct nl_sock *sk_event; + struct nl_sock *sk_sampling; + struct nl_sock *sk_cmd; + struct nl_cb *cb_cmd; + struct nl_cb *cb_event; + struct nl_cb *cb_sampling; +}; + +struct thermal_handler_param { + struct thermal_handler *th; + void *arg; +}; + +/* + * Low level netlink + */ +extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb); + +extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb); + +extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), + void *data); + +#endif /* __THERMAL_H */ diff --git a/tools/memory-model/README b/tools/memory-model/README index 9edd402704c4..dab38904206a 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -54,7 +54,8 @@ klitmus7 Compatibility Table -- 4.14 7.48 -- 4.15 -- 4.19 7.49 -- 4.20 -- 5.5 7.54 -- - 5.6 -- 7.56 -- + 5.6 -- 5.16 7.56 -- + 5.17 -- 7.56.1 -- ============ ========== diff --git a/tools/objtool/Build b/tools/objtool/Build index b7222d5cc7bc..33f2ee5a46d3 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -2,17 +2,15 @@ objtool-y += arch/$(SRCARCH)/ objtool-y += weak.o -objtool-$(SUBCMD_CHECK) += check.o -objtool-$(SUBCMD_CHECK) += special.o -objtool-$(SUBCMD_ORC) += check.o -objtool-$(SUBCMD_ORC) += orc_gen.o -objtool-$(SUBCMD_ORC) += orc_dump.o - +objtool-y += check.o +objtool-y += special.o objtool-y += builtin-check.o -objtool-y += builtin-orc.o objtool-y += elf.o objtool-y += objtool.o +objtool-$(BUILD_ORC) += orc_gen.o +objtool-$(BUILD_ORC) += orc_dump.o + objtool-y += libstring.o objtool-y += libctype.o objtool-y += str_error_r.o diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/objtool.txt index 30f38fdc0d56..8a671902a187 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/objtool.txt @@ -1,15 +1,103 @@ -Compile-time stack metadata validation -====================================== +Objtool +======= +The kernel CONFIG_OBJTOOL option enables a host tool named 'objtool' +which runs at compile time. It can do various validations and +transformations on .o files. -Overview +Objtool has become an integral part of the x86-64 kernel toolchain. The +kernel depends on it for a variety of security and performance features +(and other types of features as well). + + +Features -------- -The kernel CONFIG_STACK_VALIDATION option enables a host tool named -objtool which runs at compile time. It has a "check" subcommand which -analyzes every .o file and ensures the validity of its stack metadata. -It enforces a set of rules on asm code and C inline assembly code so -that stack traces can be reliable. +Objtool has the following features: + +- Stack unwinding metadata validation -- useful for helping to ensure + stack traces are reliable for live patching + +- ORC unwinder metadata generation -- a faster and more precise + alternative to frame pointer based unwinding + +- Retpoline validation -- ensures that all indirect calls go through + retpoline thunks, for Spectre v2 mitigations + +- Retpoline call site annotation -- annotates all retpoline thunk call + sites, enabling the kernel to patch them inline, to prevent "thunk + funneling" for both security and performance reasons + +- Non-instrumentation validation -- validates non-instrumentable + ("noinstr") code rules, preventing instrumentation in low-level C + entry code + +- Static call annotation -- annotates static call sites, enabling the + kernel to implement inline static calls, a faster alternative to some + indirect branches + +- Uaccess validation -- validates uaccess rules for a proper + implementation of Supervisor Mode Access Protection (SMAP) + +- Straight Line Speculation validation -- validates certain SLS + mitigations + +- Indirect Branch Tracking validation -- validates Intel CET IBT rules + to ensure that all functions referenced by function pointers have + corresponding ENDBR instructions + +- Indirect Branch Tracking annotation -- annotates unused ENDBR + instruction sites, enabling the kernel to "seal" them (replace them + with NOPs) to further harden IBT + +- Function entry annotation -- annotates function entries, enabling + kernel function tracing + +- Other toolchain hacks which will go unmentioned at this time... + +Each feature can be enabled individually or in combination using the +objtool cmdline. + + +Objects +------- + +Typically, objtool runs on every translation unit (TU, aka ".o file") in +the kernel. If a TU is part of a kernel module, the '--module' option +is added. + +However: + +- If noinstr validation is enabled, it also runs on vmlinux.o, with all + options removed and '--noinstr' added. + +- If IBT or LTO is enabled, it doesn't run on TUs at all. Instead it + runs on vmlinux.o and linked modules, with all options. + +In summary: + + A) Legacy mode: + TU: objtool [--module] <options> + vmlinux: N/A + module: N/A + + B) CONFIG_NOINSTR_VALIDATION=y && !(CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y): + TU: objtool [--module] <options> // no --noinstr + vmlinux: objtool --noinstr // other options removed + module: N/A + + C) CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y: + TU: N/A + vmlinux: objtool --noinstr <options> + module: objtool --module --noinstr <options> + + +Stack validation +---------------- + +Objtool's stack validation feature analyzes every .o file and ensures +the validity of its stack metadata. It enforces a set of rules on asm +code and C inline assembly code so that stack traces can be reliable. For each function, it recursively follows all possible code paths and validates the correct frame pointer state at each instruction. @@ -20,14 +108,6 @@ alternative execution paths to a given instruction (or set of instructions). Similarly, it knows how to follow switch statements, for which gcc sometimes uses jump tables. -(Objtool also has an 'orc generate' subcommand which generates debuginfo -for the ORC unwinder. See Documentation/x86/orc-unwinder.rst in the -kernel tree for more details.) - - -Why do we need stack metadata validation? ------------------------------------------ - Here are some of the benefits of validating stack metadata: a) More reliable stack traces for frame pointer enabled kernels @@ -113,9 +193,6 @@ c) Higher live patching compatibility rate For more details, see the livepatch documentation in the Linux kernel source tree at Documentation/livepatch/livepatch.rst. -Rules ------ - To achieve the validation, objtool enforces the following rules: 1. Each callable function must be annotated as such with the ELF @@ -177,7 +254,8 @@ Another possible cause for errors in C code is if the Makefile removes -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. Here are some examples of common warnings reported by objtool, what -they mean, and suggestions for how to fix them. +they mean, and suggestions for how to fix them. When in doubt, ping +the objtool maintainers. 1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup @@ -358,3 +436,7 @@ ignore it: OBJECT_FILES_NON_STANDARD := y to the Makefile. + +NOTE: OBJECT_FILES_NON_STANDARD doesn't work for link time validation of +vmlinux.o or a linked module. So it should only be used for files which +aren't linked into vmlinux or a module. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 0dbd397f319d..e66d717c245d 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -39,15 +39,13 @@ CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED) AWK = awk -SUBCMD_CHECK := n -SUBCMD_ORC := n +BUILD_ORC := n ifeq ($(SRCARCH),x86) - SUBCMD_CHECK := y - SUBCMD_ORC := y + BUILD_ORC := y endif -export SUBCMD_CHECK SUBCMD_ORC +export BUILD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include @@ -65,7 +63,7 @@ $(LIBSUBCMD): fixdep FORCE clean: $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete - $(Q)$(RM) $(OUTPUT)arch/x86/inat-tables.c $(OUTPUT)fixdep + $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(LIBSUBCMD) FORCE: diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 943cb41cddf7..8b990a52aada 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -581,7 +581,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; case 0xc7: /* mov imm, r/m */ - if (!noinstr) + if (!opts.noinstr) break; if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) { diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index e707d9bcd161..7c97b7391279 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -20,7 +20,7 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt) * find paths that see the STAC but take the NOP instead of * CLAC and the other way around. */ - if (uaccess) + if (opts.uaccess) alt->skip_orig = true; else alt->skip_alt = true; diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index fc6975ab8b06..f4c3a5091737 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -3,28 +3,21 @@ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> */ -/* - * objtool check: - * - * This command analyzes every .o file and ensures the validity of its stack - * trace metadata. It enforces a set of rules on asm code and C inline - * assembly code so that stack traces can be reliable. - * - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ - #include <subcmd/parse-options.h> #include <string.h> #include <stdlib.h> #include <objtool/builtin.h> #include <objtool/objtool.h> -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt; +#define ERROR(format, ...) \ + fprintf(stderr, \ + "error: objtool: " format "\n", \ + ##__VA_ARGS__) + +struct opts opts; static const char * const check_usage[] = { - "objtool check [<options>] file.o", + "objtool <actions> [<options>] file.o", NULL, }; @@ -33,22 +26,64 @@ static const char * const env_usage[] = { NULL, }; +static int parse_dump(const struct option *opt, const char *str, int unset) +{ + if (!str || !strcmp(str, "orc")) { + opts.dump_orc = true; + return 0; + } + + return -1; +} + +static int parse_hacks(const struct option *opt, const char *str, int unset) +{ + bool found = false; + + /* + * Use strstr() as a lazy method of checking for comma-separated + * options. + * + * No string provided == enable all options. + */ + + if (!str || strstr(str, "jump_label")) { + opts.hack_jump_label = true; + found = true; + } + + if (!str || strstr(str, "noinstr")) { + opts.hack_noinstr = true; + found = true; + } + + return found ? 0 : -1; +} + const struct option check_options[] = { - OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), - OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), - OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), - OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), - OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), - OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), - OPT_BOOLEAN('s', "stats", &stats, "print statistics"), - OPT_BOOLEAN(0, "lto", <o, "whole-archive like runs"), - OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), - OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), - OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), - OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), - OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), - OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"), - OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"), + OPT_GROUP("Actions:"), + OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks), + OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"), + OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"), + OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), + OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"), + OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), + OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), + OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), + OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), + OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"), + OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), + + OPT_GROUP("Options:"), + OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"), + OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"), + OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"), + OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"), + OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"), + OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"), + OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"), + OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"), + OPT_END(), }; @@ -79,7 +114,59 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]) return argc; } -int cmd_check(int argc, const char **argv) +static bool opts_valid(void) +{ + if (opts.hack_jump_label || + opts.hack_noinstr || + opts.ibt || + opts.mcount || + opts.noinstr || + opts.orc || + opts.retpoline || + opts.sls || + opts.stackval || + opts.static_call || + opts.uaccess) { + if (opts.dump_orc) { + ERROR("--dump can't be combined with other options"); + return false; + } + + return true; + } + + if (opts.dump_orc) + return true; + + ERROR("At least one command required"); + return false; +} + +static bool link_opts_valid(struct objtool_file *file) +{ + if (opts.link) + return true; + + if (has_multiple_files(file->elf)) { + ERROR("Linked object detected, forcing --link"); + opts.link = true; + return true; + } + + if (opts.noinstr) { + ERROR("--noinstr requires --link"); + return false; + } + + if (opts.ibt) { + ERROR("--ibt requires --link"); + return false; + } + + return true; +} + +int objtool_run(int argc, const char **argv) { const char *objname; struct objtool_file *file; @@ -88,10 +175,19 @@ int cmd_check(int argc, const char **argv) argc = cmd_parse_options(argc, argv, check_usage); objname = argv[0]; + if (!opts_valid()) + return 1; + + if (opts.dump_orc) + return orc_dump(objname); + file = objtool_open_read(objname); if (!file) return 1; + if (!link_opts_valid(file)) + return 1; + ret = check(file); if (ret) return ret; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c deleted file mode 100644 index 17f8b9307738..000000000000 --- a/tools/objtool/builtin-orc.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> - */ - -/* - * objtool orc: - * - * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip - * sections to it, which is used by the in-kernel ORC unwinder. - * - * This command is a superset of "objtool check". - */ - -#include <string.h> -#include <objtool/builtin.h> -#include <objtool/objtool.h> - -static const char *orc_usage[] = { - "objtool orc generate [<options>] file.o", - "objtool orc dump file.o", - NULL, -}; - -int cmd_orc(int argc, const char **argv) -{ - const char *objname; - - argc--; argv++; - if (argc <= 0) - usage_with_options(orc_usage, check_options); - - if (!strncmp(argv[0], "gen", 3)) { - struct objtool_file *file; - int ret; - - argc = cmd_parse_options(argc, argv, orc_usage); - objname = argv[0]; - - file = objtool_open_read(objname); - if (!file) - return 1; - - ret = check(file); - if (ret) - return ret; - - if (list_empty(&file->insn_list)) - return 0; - - ret = orc_create(file); - if (ret) - return ret; - - if (!file->elf->changed) - return 0; - - return elf_write(file->elf); - } - - if (!strcmp(argv[0], "dump")) { - if (argc != 2) - usage_with_options(orc_usage, check_options); - - objname = argv[1]; - - return orc_dump(objname); - } - - usage_with_options(orc_usage, check_options); - - return 0; -} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ca5b74603008..190b2f6e360a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -5,6 +5,7 @@ #include <string.h> #include <stdlib.h> +#include <inttypes.h> #include <sys/mman.h> #include <arch/elf.h> @@ -263,7 +264,8 @@ static void init_cfi_state(struct cfi_state *cfi) cfi->drap_offset = -1; } -static void init_insn_state(struct insn_state *state, struct section *sec) +static void init_insn_state(struct objtool_file *file, struct insn_state *state, + struct section *sec) { memset(state, 0, sizeof(*state)); init_cfi_state(&state->cfi); @@ -273,7 +275,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec) * not correctly determine insn->call_dest->sec (external symbols do * not have a section). */ - if (vmlinux && noinstr && sec) + if (opts.link && opts.noinstr && sec) state->noinstr = sec->noinstr; } @@ -339,7 +341,7 @@ static void *cfi_hash_alloc(unsigned long size) if (cfi_hash == (void *)-1L) { WARN("mmap fail cfi_hash"); cfi_hash = NULL; - } else if (stats) { + } else if (opts.stats) { printf("cfi_bits: %d\n", cfi_bits); } @@ -434,7 +436,7 @@ static int decode_instructions(struct objtool_file *file) } } - if (stats) + if (opts.stats) printf("nr_insns: %lu\n", nr_insns); return 0; @@ -497,7 +499,7 @@ static int init_pv_ops(struct objtool_file *file) struct symbol *sym; int idx, nr; - if (!noinstr) + if (!opts.noinstr) return 0; file->pv_ops = NULL; @@ -560,12 +562,12 @@ static int add_dead_ends(struct objtool_file *file) else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find unreachable insn at %s+0x%lx", + WARN("can't find unreachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find unreachable insn at %s+0x%lx", + WARN("can't find unreachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } @@ -595,12 +597,12 @@ reachable: else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find reachable insn at %s+0x%lx", + WARN("can't find reachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find reachable insn at %s+0x%lx", + WARN("can't find reachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } @@ -668,7 +670,7 @@ static int create_static_call_sections(struct objtool_file *file) key_sym = find_symbol_by_name(file->elf, tmp); if (!key_sym) { - if (!module) { + if (!opts.module) { WARN("static_call: can't find static_call_key symbol: %s", tmp); return -1; } @@ -761,7 +763,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) list_for_each_entry(insn, &file->endbr_list, call_node) idx++; - if (stats) { + if (opts.stats) { printf("ibt: ENDBR at function start: %d\n", file->nr_endbr); printf("ibt: ENDBR inside functions: %d\n", file->nr_endbr_int); printf("ibt: superfluous ENDBR: %d\n", idx); @@ -1028,7 +1030,7 @@ static void add_uaccess_safe(struct objtool_file *file) struct symbol *func; const char **name; - if (!uaccess) + if (!opts.uaccess) return; for (name = uaccess_safe_builtin; *name; name++) { @@ -1144,7 +1146,7 @@ static void annotate_call_site(struct objtool_file *file, * attribute so they need a little help, NOP out any such calls from * noinstr text. */ - if (insn->sec->noinstr && sym->profiling_func) { + if (opts.hack_noinstr && insn->sec->noinstr && sym->profiling_func) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -1170,7 +1172,7 @@ static void annotate_call_site(struct objtool_file *file, return; } - if (mcount && sym->fentry) { + if (opts.mcount && sym->fentry) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); @@ -1256,7 +1258,7 @@ static bool is_first_func_insn(struct objtool_file *file, struct instruction *in if (insn->offset == insn->func->offset) return true; - if (ibt) { + if (opts.ibt) { struct instruction *prev = prev_insn_same_sym(file, insn); if (prev && prev->type == INSN_ENDBR && @@ -1592,7 +1594,7 @@ static int handle_jump_alt(struct objtool_file *file, return -1; } - if (special_alt->key_addend & 2) { + if (opts.hack_jump_label && special_alt->key_addend & 2) { struct reloc *reloc = insn_reloc(file, orig_insn); if (reloc) { @@ -1699,7 +1701,7 @@ static int add_special_section_alts(struct objtool_file *file) free(special_alt); } - if (stats) { + if (opts.stats) { printf("jl\\\tNOP\tJMP\n"); printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short); printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long); @@ -1945,7 +1947,7 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; - if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); if (sym && sym->bind == STB_GLOBAL && @@ -2806,7 +2808,7 @@ static int update_cfi_state(struct instruction *insn, } /* detect when asm code uses rbp as a scratch register */ - if (!no_fp && insn->func && op->src.reg == CFI_BP && + if (opts.stackval && insn->func && op->src.reg == CFI_BP && cfa->base != CFI_BP) cfi->bp_scratch = true; break; @@ -3182,114 +3184,6 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file, return next_insn_same_sec(file, insn); } -static struct instruction * -validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc) -{ - struct instruction *dest; - struct section *sec; - unsigned long off; - - sec = reloc->sym->sec; - off = reloc->sym->offset; - - if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) && - (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)) - off += arch_dest_reloc_offset(reloc->addend); - else - off += reloc->addend; - - dest = find_insn(file, sec, off); - if (!dest) - return NULL; - - if (dest->type == INSN_ENDBR) { - if (!list_empty(&dest->call_node)) - list_del_init(&dest->call_node); - - return NULL; - } - - if (reloc->sym->static_call_tramp) - return NULL; - - return dest; -} - -static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset, - struct instruction *dest) -{ - WARN_FUNC("%srelocation to !ENDBR: %s", sec, offset, msg, - offstr(dest->sec, dest->offset)); -} - -static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn, - struct instruction *dest) -{ - if (dest->func && dest->func == insn->func) { - /* - * Anything from->to self is either _THIS_IP_ or IRET-to-self. - * - * There is no sane way to annotate _THIS_IP_ since the compiler treats the - * relocation as a constant and is happy to fold in offsets, skewing any - * annotation we do, leading to vast amounts of false-positives. - * - * There's also compiler generated _THIS_IP_ through KCOV and - * such which we have no hope of annotating. - * - * As such, blanket accept self-references without issue. - */ - return; - } - - if (dest->noendbr) - return; - - warn_noendbr("", insn->sec, insn->offset, dest); -} - -static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn) -{ - struct instruction *dest; - struct reloc *reloc; - - switch (insn->type) { - case INSN_CALL: - case INSN_CALL_DYNAMIC: - case INSN_JUMP_CONDITIONAL: - case INSN_JUMP_UNCONDITIONAL: - case INSN_JUMP_DYNAMIC: - case INSN_JUMP_DYNAMIC_CONDITIONAL: - case INSN_RETURN: - /* - * We're looking for code references setting up indirect code - * flow. As such, ignore direct code flow and the actual - * dynamic branches. - */ - return; - - case INSN_NOP: - /* - * handle_group_alt() will create INSN_NOP instruction that - * don't belong to any section, ignore all NOP since they won't - * carry a (useful) relocation anyway. - */ - return; - - default: - break; - } - - for (reloc = insn_reloc(file, insn); - reloc; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - reloc->offset + 1, - (insn->offset + insn->len) - (reloc->offset + 1))) { - dest = validate_ibt_reloc(file, reloc); - if (dest) - validate_ibt_dest(file, insn, dest); - } -} - /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3363,7 +3257,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, ret = validate_branch(file, func, alt->insn, state); if (ret) { - if (backtrace) + if (opts.backtrace) BT_FUNC("(alt)", insn); return ret; } @@ -3379,11 +3273,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (sls && !insn->retpoline_safe && - next_insn && next_insn->type != INSN_TRAP) { - WARN_FUNC("missing int3 after ret", - insn->sec, insn->offset); - } return validate_return(func, insn, &state); case INSN_CALL: @@ -3392,7 +3281,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (ret) return ret; - if (!no_fp && func && !is_fentry_call(insn) && + if (opts.stackval && func && !is_fentry_call(insn) && !has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); @@ -3415,7 +3304,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, ret = validate_branch(file, func, insn->jump_dest, state); if (ret) { - if (backtrace) + if (opts.backtrace) BT_FUNC("(branch)", insn); return ret; } @@ -3427,13 +3316,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: - if (sls && !insn->retpoline_safe && - next_insn && next_insn->type != INSN_TRAP) { - WARN_FUNC("missing int3 after indirect jump", - insn->sec, insn->offset); - } - - /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { ret = validate_sibling_call(file, insn, &state); @@ -3499,9 +3381,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; } - if (ibt) - validate_ibt_insn(file, insn); - if (insn->dead_end) return 0; @@ -3528,7 +3407,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) if (!file->hints) return 0; - init_insn_state(&state, sec); + init_insn_state(file, &state, sec); if (sec) { insn = find_insn(file, sec, 0); @@ -3541,7 +3420,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { if (insn->hint && !insn->visited && !insn->ignore) { ret = validate_branch(file, insn->func, insn, state); - if (ret && backtrace) + if (ret && opts.backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; } @@ -3571,7 +3450,7 @@ static int validate_retpoline(struct objtool_file *file) * loaded late, they very much do need retpoline in their * .init.text */ - if (!strcmp(insn->sec->name, ".init.text") && !module) + if (!strcmp(insn->sec->name, ".init.text") && !opts.module) continue; WARN_FUNC("indirect %s found in RETPOLINE build", @@ -3614,14 +3493,14 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return true; /* - * Whole archive runs might encounder dead code from weak symbols. + * Whole archive runs might encounter dead code from weak symbols. * This is where the linker will have dropped the weak symbol in * favour of a regular symbol, but leaves the code in place. * * In this case we'll find a piece of code (whole function) that is not * covered by a !section symbol. Ignore them. */ - if (!insn->func && lto) { + if (opts.link && !insn->func) { int size = find_symbol_hole_containing(insn->sec, insn->offset); unsigned long end = insn->offset + size; @@ -3728,7 +3607,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, state->uaccess = sym->uaccess_safe; ret = validate_branch(file, insn->func, insn, *state); - if (ret && backtrace) + if (ret && opts.backtrace) BT_FUNC("<=== (sym)", insn); return ret; } @@ -3743,7 +3622,7 @@ static int validate_section(struct objtool_file *file, struct section *sec) if (func->type != STT_FUNC) continue; - init_insn_state(&state, sec); + init_insn_state(file, &state, sec); set_func_state(&state.cfi); warnings += validate_symbol(file, sec, func, &state); @@ -3752,7 +3631,7 @@ static int validate_section(struct objtool_file *file, struct section *sec) return warnings; } -static int validate_vmlinux_functions(struct objtool_file *file) +static int validate_noinstr_sections(struct objtool_file *file) { struct section *sec; int warnings = 0; @@ -3787,48 +3666,208 @@ static int validate_functions(struct objtool_file *file) return warnings; } +static void mark_endbr_used(struct instruction *insn) +{ + if (!list_empty(&insn->call_node)) + list_del_init(&insn->call_node); +} + +static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *dest; + struct reloc *reloc; + unsigned long off; + int warnings = 0; + + /* + * Looking for function pointer load relocations. Ignore + * direct/indirect branches: + */ + switch (insn->type) { + case INSN_CALL: + case INSN_CALL_DYNAMIC: + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + case INSN_RETURN: + case INSN_NOP: + return 0; + default: + break; + } + + for (reloc = insn_reloc(file, insn); + reloc; + reloc = find_reloc_by_dest_range(file->elf, insn->sec, + reloc->offset + 1, + (insn->offset + insn->len) - (reloc->offset + 1))) { + + /* + * static_call_update() references the trampoline, which + * doesn't have (or need) ENDBR. Skip warning in that case. + */ + if (reloc->sym->static_call_tramp) + continue; + + off = reloc->sym->offset; + if (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32) + off += arch_dest_reloc_offset(reloc->addend); + else + off += reloc->addend; + + dest = find_insn(file, reloc->sym->sec, off); + if (!dest) + continue; + + if (dest->type == INSN_ENDBR) { + mark_endbr_used(dest); + continue; + } + + if (dest->func && dest->func == insn->func) { + /* + * Anything from->to self is either _THIS_IP_ or + * IRET-to-self. + * + * There is no sane way to annotate _THIS_IP_ since the + * compiler treats the relocation as a constant and is + * happy to fold in offsets, skewing any annotation we + * do, leading to vast amounts of false-positives. + * + * There's also compiler generated _THIS_IP_ through + * KCOV and such which we have no hope of annotating. + * + * As such, blanket accept self-references without + * issue. + */ + continue; + } + + if (dest->noendbr) + continue; + + WARN_FUNC("relocation to !ENDBR: %s", + insn->sec, insn->offset, + offstr(dest->sec, dest->offset)); + + warnings++; + } + + return warnings; +} + +static int validate_ibt_data_reloc(struct objtool_file *file, + struct reloc *reloc) +{ + struct instruction *dest; + + dest = find_insn(file, reloc->sym->sec, + reloc->sym->offset + reloc->addend); + if (!dest) + return 0; + + if (dest->type == INSN_ENDBR) { + mark_endbr_used(dest); + return 0; + } + + if (dest->noendbr) + return 0; + + WARN_FUNC("data relocation to !ENDBR: %s", + reloc->sec->base, reloc->offset, + offstr(dest->sec, dest->offset)); + + return 1; +} + +/* + * Validate IBT rules and remove used ENDBR instructions from the seal list. + * Unused ENDBR instructions will be annotated for sealing (i.e., replaced with + * NOPs) later, in create_ibt_endbr_seal_sections(). + */ static int validate_ibt(struct objtool_file *file) { struct section *sec; struct reloc *reloc; + struct instruction *insn; + int warnings = 0; + + for_each_insn(file, insn) + warnings += validate_ibt_insn(file, insn); for_each_sec(file, sec) { - bool is_data; - /* already done in validate_branch() */ + /* Already done by validate_ibt_insn() */ if (sec->sh.sh_flags & SHF_EXECINSTR) continue; if (!sec->reloc) continue; - if (!strncmp(sec->name, ".orc", 4)) + /* + * These sections can reference text addresses, but not with + * the intent to indirect branch to them. + */ + if (!strncmp(sec->name, ".discard", 8) || + !strncmp(sec->name, ".debug", 6) || + !strcmp(sec->name, ".altinstructions") || + !strcmp(sec->name, ".ibt_endbr_seal") || + !strcmp(sec->name, ".orc_unwind_ip") || + !strcmp(sec->name, ".parainstructions") || + !strcmp(sec->name, ".retpoline_sites") || + !strcmp(sec->name, ".smp_locks") || + !strcmp(sec->name, ".static_call_sites") || + !strcmp(sec->name, "_error_injection_whitelist") || + !strcmp(sec->name, "_kprobe_blacklist") || + !strcmp(sec->name, "__bug_table") || + !strcmp(sec->name, "__ex_table") || + !strcmp(sec->name, "__jump_table") || + !strcmp(sec->name, "__mcount_loc") || + !strcmp(sec->name, "__tracepoints")) continue; - if (!strncmp(sec->name, ".discard", 8)) - continue; + list_for_each_entry(reloc, &sec->reloc->reloc_list, list) + warnings += validate_ibt_data_reloc(file, reloc); + } - if (!strncmp(sec->name, ".debug", 6)) - continue; + return warnings; +} - if (!strcmp(sec->name, "_error_injection_whitelist")) - continue; +static int validate_sls(struct objtool_file *file) +{ + struct instruction *insn, *next_insn; + int warnings = 0; - if (!strcmp(sec->name, "_kprobe_blacklist")) - continue; + for_each_insn(file, insn) { + next_insn = next_insn_same_sec(file, insn); - is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata"); + if (insn->retpoline_safe) + continue; - list_for_each_entry(reloc, &sec->reloc->reloc_list, list) { - struct instruction *dest; + switch (insn->type) { + case INSN_RETURN: + if (!next_insn || next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + warnings++; + } - dest = validate_ibt_reloc(file, reloc); - if (is_data && dest && !dest->noendbr) - warn_noendbr("data ", sec, reloc->offset, dest); + break; + case INSN_JUMP_DYNAMIC: + if (!next_insn || next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + warnings++; + } + break; + default: + break; } } - return 0; + return warnings; } static int validate_reachable_instructions(struct objtool_file *file) @@ -3853,16 +3892,6 @@ int check(struct objtool_file *file) { int ret, warnings = 0; - if (lto && !(vmlinux || module)) { - fprintf(stderr, "--lto requires: --vmlinux or --module\n"); - return 1; - } - - if (ibt && !lto) { - fprintf(stderr, "--ibt requires: --lto\n"); - return 1; - } - arch_initial_func_cfi_state(&initial_func_cfi); init_cfi_state(&init_cfi); init_cfi_state(&func_cfi); @@ -3883,73 +3912,89 @@ int check(struct objtool_file *file) if (list_empty(&file->insn_list)) goto out; - if (vmlinux && !lto) { - ret = validate_vmlinux_functions(file); + if (opts.retpoline) { + ret = validate_retpoline(file); if (ret < 0) - goto out; - + return ret; warnings += ret; - goto out; } - if (retpoline) { - ret = validate_retpoline(file); + if (opts.stackval || opts.orc || opts.uaccess) { + ret = validate_functions(file); if (ret < 0) - return ret; + goto out; warnings += ret; - } - ret = validate_functions(file); - if (ret < 0) - goto out; - warnings += ret; + ret = validate_unwind_hints(file, NULL); + if (ret < 0) + goto out; + warnings += ret; - ret = validate_unwind_hints(file, NULL); - if (ret < 0) - goto out; - warnings += ret; + if (!warnings) { + ret = validate_reachable_instructions(file); + if (ret < 0) + goto out; + warnings += ret; + } - if (ibt) { + } else if (opts.noinstr) { + ret = validate_noinstr_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + + if (opts.ibt) { ret = validate_ibt(file); if (ret < 0) goto out; warnings += ret; } - if (!warnings) { - ret = validate_reachable_instructions(file); + if (opts.sls) { + ret = validate_sls(file); if (ret < 0) goto out; warnings += ret; } - ret = create_static_call_sections(file); - if (ret < 0) - goto out; - warnings += ret; + if (opts.static_call) { + ret = create_static_call_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } - if (retpoline) { + if (opts.retpoline) { ret = create_retpoline_sites_sections(file); if (ret < 0) goto out; warnings += ret; } - if (mcount) { + if (opts.mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) goto out; warnings += ret; } - if (ibt) { + if (opts.ibt) { ret = create_ibt_endbr_seal_sections(file); if (ret < 0) goto out; warnings += ret; } - if (stats) { + if (opts.orc && !list_empty(&file->insn_list)) { + ret = orc_create(file); + if (ret < 0) + goto out; + warnings += ret; + } + + + if (opts.stats) { printf("nr_insns_visited: %ld\n", nr_insns_visited); printf("nr_cfi: %ld\n", nr_cfi); printf("nr_cfi_reused: %ld\n", nr_cfi_reused); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index ebf2ba5755c1..c25e957c1e52 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -355,7 +355,7 @@ static int read_sections(struct elf *elf) elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name)); } - if (stats) { + if (opts.stats) { printf("nr_sections: %lu\n", (unsigned long)sections_nr); printf("section_bits: %d\n", elf->section_bits); } @@ -374,9 +374,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) struct list_head *entry; struct rb_node *pnode; + INIT_LIST_HEAD(&sym->pv_target); + sym->alias = sym; + sym->type = GELF_ST_TYPE(sym->sym.st_info); sym->bind = GELF_ST_BIND(sym->sym.st_info); + if (sym->type == STT_FILE) + elf->num_files++; + sym->offset = sym->sym.st_value; sym->len = sym->sym.st_size; @@ -435,8 +441,6 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); - INIT_LIST_HEAD(&sym->pv_target); - sym->alias = sym; sym->idx = i; @@ -475,7 +479,7 @@ static int read_symbols(struct elf *elf) elf_add_symbol(elf, sym); } - if (stats) { + if (opts.stats) { printf("nr_symbols: %lu\n", (unsigned long)symbols_nr); printf("symbol_bits: %d\n", elf->symbol_bits); } @@ -546,7 +550,7 @@ static struct section *elf_create_reloc_section(struct elf *elf, int reltype); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, - unsigned int type, struct symbol *sym, long addend) + unsigned int type, struct symbol *sym, s64 addend) { struct reloc *reloc; @@ -600,24 +604,21 @@ static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) } /* - * Move the first global symbol, as per sh_info, into a new, higher symbol - * index. This fees up the shndx for a new local symbol. + * The libelf API is terrible; gelf_update_sym*() takes a data block relative + * index value, *NOT* the symbol index. As such, iterate the data blocks and + * adjust index until it fits. + * + * If no data block is found, allow adding a new data block provided the index + * is only one past the end. */ -static int elf_move_global_symbol(struct elf *elf, struct section *symtab, - struct section *symtab_shndx) +static int elf_update_symbol(struct elf *elf, struct section *symtab, + struct section *symtab_shndx, struct symbol *sym) { - Elf_Data *data, *shndx_data = NULL; - Elf32_Word first_non_local; - struct symbol *sym; - Elf_Scn *s; - - first_non_local = symtab->sh.sh_info; - - sym = find_symbol_by_index(elf, first_non_local); - if (!sym) { - WARN("no non-local symbols !?"); - return first_non_local; - } + Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF; + Elf_Data *symtab_data = NULL, *shndx_data = NULL; + Elf64_Xword entsize = symtab->sh.sh_entsize; + int max_idx, idx = sym->idx; + Elf_Scn *s, *t = NULL; s = elf_getscn(elf->elf, symtab->idx); if (!s) { @@ -625,79 +626,124 @@ static int elf_move_global_symbol(struct elf *elf, struct section *symtab, return -1; } - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return -1; + if (symtab_shndx) { + t = elf_getscn(elf->elf, symtab_shndx->idx); + if (!t) { + WARN_ELF("elf_getscn"); + return -1; + } } - data->d_buf = &sym->sym; - data->d_size = sizeof(sym->sym); - data->d_align = 1; - data->d_type = ELF_T_SYM; + for (;;) { + /* get next data descriptor for the relevant sections */ + symtab_data = elf_getdata(s, symtab_data); + if (t) + shndx_data = elf_getdata(t, shndx_data); - sym->idx = symtab->sh.sh_size / sizeof(sym->sym); - elf_dirty_reloc_sym(elf, sym); + /* end-of-list */ + if (!symtab_data) { + void *buf; - symtab->sh.sh_info += 1; - symtab->sh.sh_size += data->d_size; - symtab->changed = true; + if (idx) { + /* we don't do holes in symbol tables */ + WARN("index out of range"); + return -1; + } - if (symtab_shndx) { - s = elf_getscn(elf->elf, symtab_shndx->idx); - if (!s) { - WARN_ELF("elf_getscn"); + /* if @idx == 0, it's the next contiguous entry, create it */ + symtab_data = elf_newdata(s); + if (t) + shndx_data = elf_newdata(t); + + buf = calloc(1, entsize); + if (!buf) { + WARN("malloc"); + return -1; + } + + symtab_data->d_buf = buf; + symtab_data->d_size = entsize; + symtab_data->d_align = 1; + symtab_data->d_type = ELF_T_SYM; + + symtab->sh.sh_size += entsize; + symtab->changed = true; + + if (t) { + shndx_data->d_buf = &sym->sec->idx; + shndx_data->d_size = sizeof(Elf32_Word); + shndx_data->d_align = sizeof(Elf32_Word); + shndx_data->d_type = ELF_T_WORD; + + symtab_shndx->sh.sh_size += sizeof(Elf32_Word); + symtab_shndx->changed = true; + } + + break; + } + + /* empty blocks should not happen */ + if (!symtab_data->d_size) { + WARN("zero size data"); return -1; } - shndx_data = elf_newdata(s); + /* is this the right block? */ + max_idx = symtab_data->d_size / entsize; + if (idx < max_idx) + break; + + /* adjust index and try again */ + idx -= max_idx; + } + + /* something went side-ways */ + if (idx < 0) { + WARN("negative index"); + return -1; + } + + /* setup extended section index magic and write the symbol */ + if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + sym->sym.st_shndx = shndx; + if (!shndx_data) + shndx = 0; + } else { + sym->sym.st_shndx = SHN_XINDEX; if (!shndx_data) { - WARN_ELF("elf_newshndx_data"); + WARN("no .symtab_shndx"); return -1; } + } - shndx_data->d_buf = &sym->sec->idx; - shndx_data->d_size = sizeof(Elf32_Word); - shndx_data->d_align = 4; - shndx_data->d_type = ELF_T_WORD; - - symtab_shndx->sh.sh_size += 4; - symtab_shndx->changed = true; + if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) { + WARN_ELF("gelf_update_symshndx"); + return -1; } - return first_non_local; + return 0; } static struct symbol * elf_create_section_symbol(struct elf *elf, struct section *sec) { struct section *symtab, *symtab_shndx; - Elf_Data *shndx_data = NULL; - struct symbol *sym; - Elf32_Word shndx; + Elf32_Word first_non_local, new_idx; + struct symbol *sym, *old; symtab = find_section_by_name(elf, ".symtab"); if (symtab) { symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); - if (symtab_shndx) - shndx_data = symtab_shndx->data; } else { WARN("no .symtab"); return NULL; } - sym = malloc(sizeof(*sym)); + sym = calloc(1, sizeof(*sym)); if (!sym) { perror("malloc"); return NULL; } - memset(sym, 0, sizeof(*sym)); - - sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx); - if (sym->idx < 0) { - WARN("elf_move_global_symbol"); - return NULL; - } sym->name = sec->name; sym->sec = sec; @@ -707,24 +753,41 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) // st_other 0 // st_value 0 // st_size 0 - shndx = sec->idx; - if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { - sym->sym.st_shndx = shndx; - if (!shndx_data) - shndx = 0; - } else { - sym->sym.st_shndx = SHN_XINDEX; - if (!shndx_data) { - WARN("no .symtab_shndx"); + + /* + * Move the first global symbol, as per sh_info, into a new, higher + * symbol index. This fees up a spot for a new local symbol. + */ + first_non_local = symtab->sh.sh_info; + new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; + old = find_symbol_by_index(elf, first_non_local); + if (old) { + old->idx = new_idx; + + hlist_del(&old->hash); + elf_hash_add(symbol, &old->hash, old->idx); + + elf_dirty_reloc_sym(elf, old); + + if (elf_update_symbol(elf, symtab, symtab_shndx, old)) { + WARN("elf_update_symbol move"); return NULL; } + + new_idx = first_non_local; } - if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) { - WARN_ELF("gelf_update_symshndx"); + sym->idx = new_idx; + if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { + WARN("elf_update_symbol"); return NULL; } + /* + * Either way, we added a LOCAL symbol. + */ + symtab->sh.sh_info += 1; + elf_add_symbol(elf, sym); return sym; @@ -843,7 +906,7 @@ static int read_relocs(struct elf *elf) tot_reloc += nr_reloc; } - if (stats) { + if (opts.stats) { printf("max_reloc: %lu\n", max_reloc); printf("tot_reloc: %lu\n", tot_reloc); printf("reloc_bits: %d\n", elf->reloc_bits); @@ -1222,7 +1285,7 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; - if (dryrun) + if (opts.dryrun) return 0; /* Update changed relocation sections and section headers: */ diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index c39dbfaef6dc..280ea18b7f2b 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -8,13 +8,37 @@ #include <subcmd/parse-options.h> extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt; + +struct opts { + /* actions: */ + bool dump_orc; + bool hack_jump_label; + bool hack_noinstr; + bool ibt; + bool mcount; + bool noinstr; + bool orc; + bool retpoline; + bool sls; + bool stackval; + bool static_call; + bool uaccess; + + /* options: */ + bool backtrace; + bool backup; + bool dryrun; + bool link; + bool module; + bool no_unreachable; + bool sec_address; + bool stats; +}; + +extern struct opts opts; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); -extern int cmd_check(int argc, const char **argv); -extern int cmd_orc(int argc, const char **argv); +extern int objtool_run(int argc, const char **argv); #endif /* _BUILTIN_H */ diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 9b36802ed86f..adebfbc2b518 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -73,7 +73,7 @@ struct reloc { struct symbol *sym; unsigned long offset; unsigned int type; - long addend; + s64 addend; int idx; bool jump_table_start; }; @@ -86,7 +86,7 @@ struct elf { int fd; bool changed; char *name; - unsigned int text_size; + unsigned int text_size, num_files; struct list_head sections; int symbol_bits; @@ -131,11 +131,21 @@ static inline u32 reloc_hash(struct reloc *reloc) return sec_offset_hash(reloc->sec, reloc->offset); } +/* + * Try to see if it's a whole archive (vmlinux.o or module). + * + * Note this will miss the case where a module only has one source file. + */ +static inline bool has_multiple_files(struct elf *elf) +{ + return elf->num_files > 1; +} + struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, - unsigned int type, struct symbol *sym, long addend); + unsigned int type, struct symbol *sym, s64 addend); int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct section *insn_sec, unsigned long insn_off); diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index 802cfda0a6f6..a3e79ae75f2e 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -11,34 +11,33 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <objtool/builtin.h> #include <objtool/elf.h> extern const char *objname; static inline char *offstr(struct section *sec, unsigned long offset) { - struct symbol *func; - char *name, *str; - unsigned long name_off; + bool is_text = (sec->sh.sh_flags & SHF_EXECINSTR); + struct symbol *sym = NULL; + char *str; + int len; - func = find_func_containing(sec, offset); - if (!func) - func = find_symbol_containing(sec, offset); - if (func) { - name = func->name; - name_off = offset - func->offset; + if (is_text) + sym = find_func_containing(sec, offset); + if (!sym) + sym = find_symbol_containing(sec, offset); + + if (sym) { + str = malloc(strlen(sym->name) + strlen(sec->name) + 40); + len = sprintf(str, "%s+0x%lx", sym->name, offset - sym->offset); + if (opts.sec_address) + sprintf(str+len, " (%s+0x%lx)", sec->name, offset); } else { - name = sec->name; - name_off = offset; + str = malloc(strlen(sec->name) + 20); + sprintf(str, "%s+0x%lx", sec->name, offset); } - str = malloc(strlen(name) + 20); - - if (func) - sprintf(str, "%s()+0x%lx", name, name_off); - else - sprintf(str, "%s+0x%lx", name, name_off); - return str; } diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 843ff3c2f28e..512669ce064c 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -3,16 +3,6 @@ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> */ -/* - * objtool: - * - * The 'check' subcmd analyzes every .o file and ensures the validity of its - * stack trace metadata. It enforces a set of rules on asm code and C inline - * assembly code so that stack traces can be reliable. - * - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ - #include <stdio.h> #include <stdbool.h> #include <string.h> @@ -26,20 +16,6 @@ #include <objtool/objtool.h> #include <objtool/warn.h> -struct cmd_struct { - const char *name; - int (*fn)(int, const char **); - const char *help; -}; - -static const char objtool_usage_string[] = - "objtool COMMAND [ARGS]"; - -static struct cmd_struct objtool_cmds[] = { - {"check", cmd_check, "Perform stack metadata validation on an object file" }, - {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" }, -}; - bool help; const char *objname; @@ -118,7 +94,7 @@ struct objtool_file *objtool_open_read(const char *_objname) if (!file.elf) return NULL; - if (backup && !objtool_create_backup(objname)) { + if (opts.backup && !objtool_create_backup(objname)) { WARN("can't create backup file"); return NULL; } @@ -129,7 +105,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); - file.ignore_unreachables = no_unreachable; + file.ignore_unreachables = opts.no_unreachable; file.hints = false; return &file; @@ -137,7 +113,7 @@ struct objtool_file *objtool_open_read(const char *_objname) void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) { - if (!noinstr) + if (!opts.noinstr) return; if (!f->pv_ops) { @@ -161,70 +137,6 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) f->pv_ops[idx].clean = false; } -static void cmd_usage(void) -{ - unsigned int i, longest = 0; - - printf("\n usage: %s\n\n", objtool_usage_string); - - for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { - if (longest < strlen(objtool_cmds[i].name)) - longest = strlen(objtool_cmds[i].name); - } - - puts(" Commands:"); - for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { - printf(" %-*s ", longest, objtool_cmds[i].name); - puts(objtool_cmds[i].help); - } - - printf("\n"); - - if (!help) - exit(129); - exit(0); -} - -static void handle_options(int *argc, const char ***argv) -{ - while (*argc > 0) { - const char *cmd = (*argv)[0]; - - if (cmd[0] != '-') - break; - - if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) { - help = true; - break; - } else { - fprintf(stderr, "Unknown option: %s\n", cmd); - cmd_usage(); - } - - (*argv)++; - (*argc)--; - } -} - -static void handle_internal_command(int argc, const char **argv) -{ - const char *cmd = argv[0]; - unsigned int i, ret; - - for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) { - struct cmd_struct *p = objtool_cmds+i; - - if (strcmp(p->name, cmd)) - continue; - - ret = p->fn(argc, argv); - - exit(ret); - } - - cmd_usage(); -} - int main(int argc, const char **argv) { static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED"; @@ -233,14 +145,7 @@ int main(int argc, const char **argv) exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED); pager_init(UNUSED); - argv++; - argc--; - handle_options(&argc, &argv); - - if (!argc || help) - cmd_usage(); - - handle_internal_command(argc, argv); + objtool_run(argc, argv); return 0; } diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 8314e824db4a..d83f607733b0 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -15,17 +15,12 @@ return ENOSYS; \ }) -int __weak check(struct objtool_file *file) -{ - UNSUPPORTED("check subcommand"); -} - int __weak orc_dump(const char *_objname) { - UNSUPPORTED("orc"); + UNSUPPORTED("ORC"); } int __weak orc_create(struct objtool_file *file) { - UNSUPPORTED("orc"); + UNSUPPORTED("ORC"); } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f3bf9297bcc0..1bd64e7404b9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -553,9 +553,16 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf $(call detected,CONFIG_LIBBPF_DYNAMIC) + + $(call feature_check,libbpf-btf__load_from_kernel_by_id) + ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif + else + CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID endif endif diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 207c56805c55..0ed177991ad0 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -9,6 +9,8 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" #include "../../../util/event.h" +#include "../../../util/pmu.h" +#include "../../../util/pmu-hybrid.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void) .disabled = 1, .exclude_kernel = 1, }; + struct perf_pmu *pmu; int fd; /* * In an unnamed union, init it here to build on older gcc versions */ attr.sample_period = 1; + if (perf_pmu__has_hybrid()) { + /* + * The same register set is supported among different hybrid PMUs. + * Only check the first available one. + */ + pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list); + attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } + event_attr_init(&attr); fd = sys_perf_event_open(&attr, 0, -1, -1, 0); diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index d5289fa58a4f..20eed1e53f80 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1740,7 +1740,7 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 57b9591f7cbb..17c023823713 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -222,11 +222,11 @@ static int __test__bpf(int idx) ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, bpf_testcase_table[idx].prog_id, - true, NULL); + false, NULL); if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { pr_debug("Unable to get BPF object, %s\n", bpf_testcase_table[idx].msg_compile_fail); - if (idx == 0) + if ((idx == 0) || (ret == TEST_SKIP)) return TEST_SKIP; else return TEST_FAIL; @@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, static struct test_case bpf_tests[] = { #ifdef HAVE_LIBBPF_SUPPORT TEST_CASE("Basic BPF filtering", basic_bpf_test), - TEST_CASE("BPF pinning", bpf_pinning), + TEST_CASE_REASON("BPF pinning", bpf_pinning, + "clang isn't installed or environment missing BPF support"), #ifdef HAVE_BPF_PROLOGUE - TEST_CASE("BPF prologue generation", bpf_prologue_test), + TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, + "clang isn't installed or environment missing BPF support"), #else TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), #endif diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index fac3717d9ba1..d336cda94a11 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size, { FILE *fp; char filename[PATH_MAX]; + int ch; path__join(filename, sizeof(filename), path, name); fp = fopen(filename, "r"); @@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size, return NULL; /* Skip shebang */ - while (fgetc(fp) != '\n'); + do { + ch = fgetc(fp); + } while (ch != EOF && ch != '\n'); description = fgets(description, size, fp); fclose(fp); @@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, .priv = &st, }; - if (!perf_test__matches(test_suite.desc, curr, argc, argv)) + if (test_suite.desc == NULL || + !perf_test__matches(test_suite.desc, curr, argc, argv)) continue; st.file = ent->d_name; diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index b30dba455f36..9c9ef33e0b3c 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -5,6 +5,16 @@ set -e for p in $(perf list --raw-dump pmu); do + # In powerpc, skip the events for hv_24x7 and hv_gpci. + # These events needs input values to be filled in for + # core, chip, partition id based on system. + # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ + # hv_gpci/event,partition_id=?/ + # Hence skip these events for ppc. + if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then + echo "Skipping: Event '$p' in powerpc" + continue + fi echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee1e3dcbc0bd..d23a9e322ff5 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) && strncmp(session->header.env.arch, "aarch64", 7)) return TEST_SKIP; + /* + * In powerpc pSeries platform, not all the topology information + * are exposed via sysfs. Due to restriction, detail like + * physical_package_id will be set to -1. Hence skip this + * test if physical_package_id returns -1 for cpu from perf_cpu_map. + */ + if (strncmp(session->header.env.arch, "powerpc", 7)) { + if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1) + return TEST_SKIP; + } + TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 94624733af7e..8271ab764eb5 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -22,7 +22,8 @@ #include "record.h" #include "util/synthetic-events.h" -struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID +struct btf *btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; #pragma GCC diagnostic push @@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } +#endif int __weak bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name __maybe_unused, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f9a320694b85..a7f93f5a1ac8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; - printf("%s: nr:%" PRIu64 "\n", - !callstack ? "... branch stack" : "... branch callstack", - sample->branch_stack->nr); + if (!callstack) { + printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr); + } else { + /* the reason of adding 1 to nr is because after expanding + * branch stack it generates nr + 1 callstack records. e.g., + * B()->C() + * A()->B() + * the final callstack should be: + * C() + * B() + * A() + */ + printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1); + } for (i = 0; i < sample->branch_stack->nr; i++) { struct branch_entry *e = &entries[i]; @@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) (unsigned)e->flags.reserved, e->flags.type ? branch_type_name(e->flags.type) : ""); } else { - printf("..... %2"PRIu64": %016" PRIx64 "\n", - i, i > 0 ? e->from : e->to); + if (i == 0) { + printf("..... %2"PRIu64": %016" PRIx64 "\n" + "..... %2"PRIu64": %016" PRIx64 "\n", + i, e->to, i+1, e->from); + } else { + printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from); + } } } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 817a2de264b4..c1af37e11f98 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -472,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { - struct perf_counts_values count; + struct perf_counts_values count, *ptr; struct perf_record_stat *st = &event->stat; struct evsel *counter; + int cpu_map_idx; count.val = st->val; count.ena = st->ena; @@ -485,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session, pr_err("Failed to resolve counter for stat event.\n"); return -EINVAL; } - - *perf_counts(counter->counts, st->cpu, st->thread) = count; + cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu}); + if (cpu_map_idx == -1) { + pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter)); + return -EINVAL; + } + ptr = perf_counts(counter->counts, cpu_map_idx, st->thread); + if (ptr == NULL) { + pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n", + st->cpu, st->thread, evsel__name(counter)); + return -EINVAL; + } + *ptr = count; counter->supported = true; return 0; } diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 185b8c588e1d..38f9b9da8170 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -3,7 +3,7 @@ * * Module Name: cmfsize - Common get file size function * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 3c265bc917a1..96fd6cec78e2 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -3,7 +3,7 @@ * * Module Name: getopt * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index ccabdbaae6a4..bd08f36df4a7 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -3,7 +3,7 @@ * * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index edd99274cd12..5107892d054b 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -3,7 +3,7 @@ * * Module Name: osunixdir - Unix directory access interfaces * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index fee0022560d5..6ff4edd8dc3b 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -3,7 +3,7 @@ * * Module Name: osunixmap - Unix OSL for file mappings * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 0861728da562..b3651a04d68c 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -3,7 +3,7 @@ * * Module Name: osunixxf - UNIX OSL interfaces * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index e0ebc1dab1cc..153249c87fd7 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -3,7 +3,7 @@ * * Module Name: acpidump.h - Include file for acpi_dump utility * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 444e3d78bd89..d54dde02b87d 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -3,7 +3,7 @@ * * Module Name: apdump - Dump routines for ACPI tables (acpidump) * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index da0c6e13042b..2d9b45a9b526 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -3,7 +3,7 @@ * * Module Name: apfiles - File-related functions for acpidump utility * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index a4cf6042fcfd..44b23fc53dd9 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -3,7 +3,7 @@ * * Module Name: apmain - Main module for the acpidump utility * - * Copyright (C) 2000 - 2021, Intel Corp. + * Copyright (C) 2000 - 2022, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c index e85676711372..761375062505 100644 --- a/tools/power/x86/intel-speed-select/hfi-events.c +++ b/tools/power/x86/intel-speed-select/hfi-events.c @@ -190,7 +190,7 @@ static int handle_event(struct nl_msg *n, void *arg) struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; int ret; - struct perf_cap perf_cap; + struct perf_cap perf_cap = {0}; ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 060390e88e37..9d35614995ee 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1892,6 +1892,12 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int ret; int status = *(int *)arg4; + if (status && no_turbo()) { + isst_display_error_info_message(1, "Turbo mode is disabled", 0, 0); + ret = -1; + goto disp_results; + } + ret = isst_get_ctdp_levels(cpu, &pkg_dev); if (ret) { isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f3e3c94ab9bd..92e139b9c792 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") endif turbostat : turbostat.c -override CFLAGS += -O2 -Wall -I../../../include +override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 9b17097bc3d7..1e7d3de55a94 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -292,7 +292,7 @@ starts a new interval. must be run as root. Alternatively, non-root users can be enabled to run turbostat this way: -# setcap cap_sys_rawio=ep ./turbostat +# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat # chmod +r /dev/cpu/*/msr diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bc5ae0872fed..ede31a4287a0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2021 Intel Corporation. + * Copyright (c) 2022 Intel Corporation. * Len Brown <len.brown@intel.com> */ @@ -37,6 +37,171 @@ #include <asm/unistd.h> #include <stdbool.h> +#define UNUSED(x) (void)(x) + +/* + * This list matches the column headers, except + * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time + * 2. Core and CPU are moved to the end, we can't have strings that contain them + * matching on them for --show and --hide. + */ + +/* + * buffer size used by sscanf() for added column names + * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters + */ +#define NAME_BYTES 20 +#define PATH_BYTES 128 + +enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; +enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; +enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; + +struct msr_counter { + unsigned int msr_num; + char name[NAME_BYTES]; + char path[PATH_BYTES]; + unsigned int width; + enum counter_type type; + enum counter_format format; + struct msr_counter *next; + unsigned int flags; +#define FLAGS_HIDE (1 << 0) +#define FLAGS_SHOW (1 << 1) +#define SYSFS_PERCPU (1 << 1) +}; + +struct msr_counter bic[] = { + { 0x0, "usec", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Package", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Node", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 }, + { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Core", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU", "", 0, 0, 0, NULL, 0 }, + { 0x0, "APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Die", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IPC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 }, +}; + +#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) +#define BIC_USEC (1ULL << 0) +#define BIC_TOD (1ULL << 1) +#define BIC_Package (1ULL << 2) +#define BIC_Node (1ULL << 3) +#define BIC_Avg_MHz (1ULL << 4) +#define BIC_Busy (1ULL << 5) +#define BIC_Bzy_MHz (1ULL << 6) +#define BIC_TSC_MHz (1ULL << 7) +#define BIC_IRQ (1ULL << 8) +#define BIC_SMI (1ULL << 9) +#define BIC_sysfs (1ULL << 10) +#define BIC_CPU_c1 (1ULL << 11) +#define BIC_CPU_c3 (1ULL << 12) +#define BIC_CPU_c6 (1ULL << 13) +#define BIC_CPU_c7 (1ULL << 14) +#define BIC_ThreadC (1ULL << 15) +#define BIC_CoreTmp (1ULL << 16) +#define BIC_CoreCnt (1ULL << 17) +#define BIC_PkgTmp (1ULL << 18) +#define BIC_GFX_rc6 (1ULL << 19) +#define BIC_GFXMHz (1ULL << 20) +#define BIC_Pkgpc2 (1ULL << 21) +#define BIC_Pkgpc3 (1ULL << 22) +#define BIC_Pkgpc6 (1ULL << 23) +#define BIC_Pkgpc7 (1ULL << 24) +#define BIC_Pkgpc8 (1ULL << 25) +#define BIC_Pkgpc9 (1ULL << 26) +#define BIC_Pkgpc10 (1ULL << 27) +#define BIC_CPU_LPI (1ULL << 28) +#define BIC_SYS_LPI (1ULL << 29) +#define BIC_PkgWatt (1ULL << 30) +#define BIC_CorWatt (1ULL << 31) +#define BIC_GFXWatt (1ULL << 32) +#define BIC_PkgCnt (1ULL << 33) +#define BIC_RAMWatt (1ULL << 34) +#define BIC_PKG__ (1ULL << 35) +#define BIC_RAM__ (1ULL << 36) +#define BIC_Pkg_J (1ULL << 37) +#define BIC_Cor_J (1ULL << 38) +#define BIC_GFX_J (1ULL << 39) +#define BIC_RAM_J (1ULL << 40) +#define BIC_Mod_c6 (1ULL << 41) +#define BIC_Totl_c0 (1ULL << 42) +#define BIC_Any_c0 (1ULL << 43) +#define BIC_GFX_c0 (1ULL << 44) +#define BIC_CPUGFX (1ULL << 45) +#define BIC_Core (1ULL << 46) +#define BIC_CPU (1ULL << 47) +#define BIC_APIC (1ULL << 48) +#define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) +#define BIC_IPC (1ULL << 52) +#define BIC_CORE_THROT_CNT (1ULL << 53) + +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) +#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) +#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) +#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) + +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) + +unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; + +#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) +#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) +#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) +#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) +#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) + char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; @@ -48,6 +213,7 @@ struct timespec interval_ts = { 5, 0 }; unsigned int model_orig; unsigned int num_iterations; +unsigned int header_iterations; unsigned int debug; unsigned int quiet; unsigned int shown; @@ -159,13 +325,6 @@ int ignore_stdin; #define MAX(a, b) ((a) > (b) ? (a) : (b)) -/* - * buffer size used by sscanf() for added column names - * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters - */ -#define NAME_BYTES 20 -#define PATH_BYTES 128 - int backwards_count; char *progname; @@ -205,6 +364,7 @@ struct core_data { unsigned int core_temp_c; unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; + unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; @@ -255,24 +415,6 @@ struct pkg_data { #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) -enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; -enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; -enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; - -struct msr_counter { - unsigned int msr_num; - char name[NAME_BYTES]; - char path[PATH_BYTES]; - unsigned int width; - enum counter_type type; - enum counter_format format; - struct msr_counter *next; - unsigned int flags; -#define FLAGS_HIDE (1 << 0) -#define FLAGS_SHOW (1 << 1) -#define SYSFS_PERCPU (1 << 1) -}; - /* * The accumulated sum of MSR is defined as a monotonic * increasing MSR, it will be accumulated periodically, @@ -522,8 +664,10 @@ static int perf_instr_count_open(int cpu_num) /* counter for cpu_num, including user + kernel and all processes */ fd = perf_event_open(&pea, -1, cpu_num, -1, 0); - if (fd == -1) - err(-1, "cpu%d: perf instruction counter\n", cpu_num); + if (fd == -1) { + warn("cpu%d: perf instruction counter", cpu_num); + BIC_NOT_PRESENT(BIC_IPC); + } return fd; } @@ -550,143 +694,10 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } -/* - * This list matches the column headers, except - * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time - * 2. Core and CPU are moved to the end, we can't have strings that contain them - * matching on them for --show and --hide. - */ -struct msr_counter bic[] = { - { 0x0, "usec" }, - { 0x0, "Time_Of_Day_Seconds" }, - { 0x0, "Package" }, - { 0x0, "Node" }, - { 0x0, "Avg_MHz" }, - { 0x0, "Busy%" }, - { 0x0, "Bzy_MHz" }, - { 0x0, "TSC_MHz" }, - { 0x0, "IRQ" }, - { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL }, - { 0x0, "sysfs" }, - { 0x0, "CPU%c1" }, - { 0x0, "CPU%c3" }, - { 0x0, "CPU%c6" }, - { 0x0, "CPU%c7" }, - { 0x0, "ThreadC" }, - { 0x0, "CoreTmp" }, - { 0x0, "CoreCnt" }, - { 0x0, "PkgTmp" }, - { 0x0, "GFX%rc6" }, - { 0x0, "GFXMHz" }, - { 0x0, "Pkg%pc2" }, - { 0x0, "Pkg%pc3" }, - { 0x0, "Pkg%pc6" }, - { 0x0, "Pkg%pc7" }, - { 0x0, "Pkg%pc8" }, - { 0x0, "Pkg%pc9" }, - { 0x0, "Pk%pc10" }, - { 0x0, "CPU%LPI" }, - { 0x0, "SYS%LPI" }, - { 0x0, "PkgWatt" }, - { 0x0, "CorWatt" }, - { 0x0, "GFXWatt" }, - { 0x0, "PkgCnt" }, - { 0x0, "RAMWatt" }, - { 0x0, "PKG_%" }, - { 0x0, "RAM_%" }, - { 0x0, "Pkg_J" }, - { 0x0, "Cor_J" }, - { 0x0, "GFX_J" }, - { 0x0, "RAM_J" }, - { 0x0, "Mod%c6" }, - { 0x0, "Totl%C0" }, - { 0x0, "Any%C0" }, - { 0x0, "GFX%C0" }, - { 0x0, "CPUGFX%" }, - { 0x0, "Core" }, - { 0x0, "CPU" }, - { 0x0, "APIC" }, - { 0x0, "X2APIC" }, - { 0x0, "Die" }, - { 0x0, "GFXAMHz" }, - { 0x0, "IPC" }, -}; - -#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_USEC (1ULL << 0) -#define BIC_TOD (1ULL << 1) -#define BIC_Package (1ULL << 2) -#define BIC_Node (1ULL << 3) -#define BIC_Avg_MHz (1ULL << 4) -#define BIC_Busy (1ULL << 5) -#define BIC_Bzy_MHz (1ULL << 6) -#define BIC_TSC_MHz (1ULL << 7) -#define BIC_IRQ (1ULL << 8) -#define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) -#define BIC_CPU_c1 (1ULL << 11) -#define BIC_CPU_c3 (1ULL << 12) -#define BIC_CPU_c6 (1ULL << 13) -#define BIC_CPU_c7 (1ULL << 14) -#define BIC_ThreadC (1ULL << 15) -#define BIC_CoreTmp (1ULL << 16) -#define BIC_CoreCnt (1ULL << 17) -#define BIC_PkgTmp (1ULL << 18) -#define BIC_GFX_rc6 (1ULL << 19) -#define BIC_GFXMHz (1ULL << 20) -#define BIC_Pkgpc2 (1ULL << 21) -#define BIC_Pkgpc3 (1ULL << 22) -#define BIC_Pkgpc6 (1ULL << 23) -#define BIC_Pkgpc7 (1ULL << 24) -#define BIC_Pkgpc8 (1ULL << 25) -#define BIC_Pkgpc9 (1ULL << 26) -#define BIC_Pkgpc10 (1ULL << 27) -#define BIC_CPU_LPI (1ULL << 28) -#define BIC_SYS_LPI (1ULL << 29) -#define BIC_PkgWatt (1ULL << 30) -#define BIC_CorWatt (1ULL << 31) -#define BIC_GFXWatt (1ULL << 32) -#define BIC_PkgCnt (1ULL << 33) -#define BIC_RAMWatt (1ULL << 34) -#define BIC_PKG__ (1ULL << 35) -#define BIC_RAM__ (1ULL << 36) -#define BIC_Pkg_J (1ULL << 37) -#define BIC_Cor_J (1ULL << 38) -#define BIC_GFX_J (1ULL << 39) -#define BIC_RAM_J (1ULL << 40) -#define BIC_Mod_c6 (1ULL << 41) -#define BIC_Totl_c0 (1ULL << 42) -#define BIC_Any_c0 (1ULL << 43) -#define BIC_GFX_c0 (1ULL << 44) -#define BIC_CPUGFX (1ULL << 45) -#define BIC_Core (1ULL << 46) -#define BIC_CPU (1ULL << 47) -#define BIC_APIC (1ULL << 48) -#define BIC_X2APIC (1ULL << 49) -#define BIC_Die (1ULL << 50) -#define BIC_GFXACTMHz (1ULL << 51) -#define BIC_IPC (1ULL << 52) - -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) -#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) -#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) -#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) -#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) - -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) - -unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; - -#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) -#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) -#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) -#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) -#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) -#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) - #define MAX_DEFERRED 16 +char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; +int deferred_add_index; int deferred_skip_index; /* @@ -720,6 +731,8 @@ void help(void) " -l, --list list column headers only\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" + " -N, --header_iterations num\n" + " print header every num iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" " -q, --quiet skip decoding system configuration header\n" @@ -741,7 +754,7 @@ void help(void) */ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) { - int i; + unsigned int i; unsigned long long retval = 0; while (name_list) { @@ -752,40 +765,51 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (comma) *comma = '\0'; - if (!strcmp(name_list, "all")) - return ~0; - if (!strcmp(name_list, "topology")) - return BIC_TOPOLOGY; - if (!strcmp(name_list, "power")) - return BIC_THERMAL_PWR; - if (!strcmp(name_list, "idle")) - return BIC_IDLE; - if (!strcmp(name_list, "frequency")) - return BIC_FREQUENCY; - if (!strcmp(name_list, "other")) - return BIC_OTHER; - if (!strcmp(name_list, "all")) - return 0; - for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { retval |= (1ULL << i); break; } + if (!strcmp(name_list, "all")) { + retval |= ~0; + break; + } else if (!strcmp(name_list, "topology")) { + retval |= BIC_TOPOLOGY; + break; + } else if (!strcmp(name_list, "power")) { + retval |= BIC_THERMAL_PWR; + break; + } else if (!strcmp(name_list, "idle")) { + retval |= BIC_IDLE; + break; + } else if (!strcmp(name_list, "frequency")) { + retval |= BIC_FREQUENCY; + break; + } else if (!strcmp(name_list, "other")) { + retval |= BIC_OTHER; + break; + } + } if (i == MAX_BIC) { if (mode == SHOW_LIST) { - fprintf(stderr, "Invalid counter name: %s\n", name_list); - exit(-1); - } - deferred_skip_names[deferred_skip_index++] = name_list; - if (debug) - fprintf(stderr, "deferred \"%s\"\n", name_list); - if (deferred_skip_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", - MAX_DEFERRED, name_list); - help(); - exit(1); + deferred_add_names[deferred_add_index++] = name_list; + if (deferred_add_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } + } else { + deferred_skip_names[deferred_skip_index++] = name_list; + if (debug) + fprintf(stderr, "deferred \"%s\"\n", name_list); + if (deferred_skip_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } } } @@ -872,6 +896,9 @@ void print_header(char *delim) if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); @@ -1011,6 +1038,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { @@ -1225,6 +1253,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); + /* Core throttle count */ + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) @@ -1311,6 +1343,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); @@ -1386,14 +1419,14 @@ void flush_output_stderr(void) void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - static int printed; + static int count; - if (!printed || !summary_only) + if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) print_header("\t"); format_counters(&average.threads, &average.cores, &average.packages); - printed = 1; + count++; if (summary_only) return; @@ -1467,6 +1500,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + old->core_throt_cnt = new->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; DELTA_WRAP32(new->core_energy, old->core_energy); @@ -1626,6 +1660,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->mc6_us = 0; c->core_temp_c = 0; c->core_energy = 0; + c->core_throt_cnt = 0; p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; @@ -1710,6 +1745,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.mc6_us += c->mc6_us; average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); average.cores.core_energy += c->core_energy; @@ -1987,6 +2023,26 @@ void get_apic_id(struct thread_data *t) fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); } +int get_core_throt_cnt(int cpu, unsigned long long *cnt) +{ + char path[128 + PATH_BYTES]; + unsigned long long tmp; + FILE *fp; + int ret; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); + fp = fopen(path, "r"); + if (!fp) + return -1; + ret = fscanf(fp, "%lld", &tmp); + if (ret != 1) + return -1; + fclose(fp); + *cnt = tmp; + + return 0; +} + /* * get_counters(...) * migrate to cpu @@ -2129,6 +2185,9 @@ retry: c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); } + if (DO_BIC(BIC_CORE_THROT_CNT)) + get_core_throt_cnt(cpu, &c->core_throt_cnt); + if (do_rapl & RAPL_AMD_F17H) { if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) return -14; @@ -2428,6 +2487,9 @@ int has_turbo_ratio_group_limits(int family, int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: @@ -2435,8 +2497,9 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT_D: case INTEL_FAM6_ATOM_TREMONT_D: return 1; + default: + return 0; } - return 0; } static void dump_turbo_ratio_limits(int family, int model) @@ -3027,6 +3090,8 @@ void set_max_cpu_num(void) */ int count_cpus(int cpu) { + UNUSED(cpu); + topo.num_cpus++; return 0; } @@ -3361,6 +3426,9 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg int i, ret; int cpu = t->cpu_id; + UNUSED(c); + UNUSED(p); + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { unsigned long long msr_cur, msr_last; off_t offset; @@ -3387,6 +3455,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg static void msr_record_handler(union sigval v) { + UNUSED(v); + for_all_cpus(update_msr_sum, EVEN_COUNTERS); } @@ -3439,6 +3509,9 @@ release_msr: /* * set_my_sched_priority(pri) * return previous + * + * if non-root, do this: + * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat */ int set_my_sched_priority(int priority) { @@ -3457,7 +3530,7 @@ int set_my_sched_priority(int priority) errno = 0; retval = getpriority(PRIO_PROCESS, 0); if (retval != priority) - err(-1, "getpriority(%d) != setpriority(%d)", retval, priority); + err(retval, "getpriority(%d) != setpriority(%d)", retval, priority); return original_priority; } @@ -3466,7 +3539,7 @@ void turbostat_loop() { int retval; int restarted = 0; - int done_iters = 0; + unsigned int done_iters = 0; setup_signal_handler(); @@ -3678,6 +3751,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) break; case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ no_MSR_MISC_PWR_MGMT = 1; + /* FALLTHRU */ case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ pkg_cstate_limits = slv_pkg_cstate_limits; break; @@ -3721,6 +3795,9 @@ int has_slv_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_MID: @@ -3736,6 +3813,9 @@ int is_dnv(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_GOLDMONT_D: return 1; @@ -3749,6 +3829,9 @@ int is_bdx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_BROADWELL_X: return 1; @@ -3762,6 +3845,9 @@ int is_skx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SKYLAKE_X: return 1; @@ -3775,6 +3861,9 @@ int is_icx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ICELAKE_X: return 1; @@ -3787,6 +3876,9 @@ int is_ehl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_TREMONT: return 1; @@ -3799,6 +3891,9 @@ int is_jvl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_TREMONT_D: return 1; @@ -3811,6 +3906,9 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model) if (has_slv_msrs(family, model)) return 0; + if (family != 6) + return 0; + switch (model) { /* Nehalem compatible, but do not include turbo-ratio limit support */ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ @@ -4125,6 +4223,9 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) char *epb_string; int cpu, epb; + UNUSED(c); + UNUSED(p); + if (!has_epb) return 0; @@ -4171,6 +4272,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) unsigned long long msr; int cpu; + UNUSED(c); + UNUSED(p); + if (!has_hwp) return 0; @@ -4254,6 +4358,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data unsigned long long msr; int cpu; + UNUSED(c); + UNUSED(p); + cpu = t->cpu_id; /* per-package */ @@ -4359,6 +4466,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { + UNUSED(family); + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ return 280.0; } @@ -4376,6 +4485,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); @@ -4559,6 +4669,8 @@ void rapl_probe_amd(unsigned int family, unsigned int model) unsigned int has_rapl = 0; double tdp; + UNUSED(model); + if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); /* RAPL (Fam 17h+) */ @@ -4617,6 +4729,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) case INTEL_FAM6_HASWELL_L: /* HSW */ case INTEL_FAM6_HASWELL_G: /* HSW */ do_gfx_perf_limit_reasons = 1; + /* FALLTHRU */ case INTEL_FAM6_HASWELL_X: /* HSX */ do_core_perf_limit_reasons = 1; do_ring_perf_limit_reasons = 1; @@ -4643,6 +4756,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p unsigned int dts, dts2; int cpu; + UNUSED(c); + UNUSED(p); + if (!(do_dts || do_ptm)) return 0; @@ -4698,7 +4814,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { - fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, @@ -4714,6 +4830,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) const char *msr_name; int cpu; + UNUSED(c); + UNUSED(p); + if (!do_rapl) return 0; @@ -4762,12 +4881,19 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); - fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); + + if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) + return -9; + + fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); + fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", + cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } if (do_rapl & RAPL_DRAM_POWER_INFO) { @@ -4830,6 +4956,9 @@ int has_snb_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SANDYBRIDGE: case INTEL_FAM6_SANDYBRIDGE_X: @@ -4873,6 +5002,9 @@ int has_c8910_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_HASWELL_L: /* HSW */ case INTEL_FAM6_BROADWELL: /* BDW */ @@ -4899,6 +5031,9 @@ int has_skl_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ @@ -4911,6 +5046,10 @@ int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; + + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ @@ -4923,6 +5062,10 @@ int is_knl(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; + + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ return 1; @@ -4935,6 +5078,9 @@ int is_cnl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_CANNONLAKE_L: /* CNL */ return 1; @@ -4989,6 +5135,9 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned int eax, ebx, ecx, edx; + UNUSED(c); + UNUSED(p); + if (!genuine_intel) return 0; @@ -5025,6 +5174,9 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk unsigned int tcc_default, tcc_offset; int cpu; + UNUSED(c); + UNUSED(p); + /* tj_max is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -5572,6 +5724,11 @@ void process_cpuid() else BIC_NOT_PRESENT(BIC_CPU_LPI); + if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) + BIC_PRESENT(BIC_CORE_THROT_CNT); + else + BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + if (!access(sys_lpi_file_sysfs, R_OK)) { sys_lpi_file = sys_lpi_file_sysfs; BIC_PRESENT(BIC_SYS_LPI); @@ -5601,11 +5758,6 @@ int dir_filter(const struct dirent *dirp) return 0; } -int open_dev_cpu_msr(int dummy1) -{ - return 0; -} - void topology_probe() { int i; @@ -5896,6 +6048,9 @@ void turbostat_init() if (!quiet && do_irtl_snb) print_irtl(); + + if (DO_BIC(BIC_IPC)) + (void)get_instr_count_fd(base_cpu); } int fork_it(char **argv) @@ -5973,7 +6128,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n"); } int add_counter(unsigned int msr_num, char *path, char *name, @@ -6138,6 +6293,16 @@ next: } } +int is_deferred_add(char *name) +{ + int i; + + for (i = 0; i < deferred_add_index; ++i) + if (!strcmp(name, deferred_add_names[i])) + return 1; + return 0; +} + int is_deferred_skip(char *name) { int i; @@ -6156,9 +6321,6 @@ void probe_sysfs(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -6181,6 +6343,9 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/time", state); + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + if (is_deferred_skip(name_buf)) continue; @@ -6206,6 +6371,9 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/usage", state); + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + if (is_deferred_skip(name_buf)) continue; @@ -6313,6 +6481,7 @@ void cmdline(int argc, char **argv) { "interval", required_argument, 0, 'i' }, { "IPC", no_argument, 0, 'I' }, { "num_iterations", required_argument, 0, 'n' }, + { "header_iterations", required_argument, 0, 'N' }, { "help", no_argument, 0, 'h' }, { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help { "Joules", no_argument, 0, 'J' }, @@ -6394,6 +6563,14 @@ void cmdline(int argc, char **argv) exit(2); } break; + case 'N': + header_iterations = strtod(optarg, NULL); + + if (header_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", header_iterations); + exit(2); + } + break; case 's': /* * --show: show only those specified @@ -6432,6 +6609,8 @@ int main(int argc, char **argv) turbostat_init(); + msr_sum_record(); + /* dump counters and exit */ if (dump_only) return get_and_dump_counters(); @@ -6443,7 +6622,6 @@ int main(int argc, char **argv) return 0; } - msr_sum_record(); /* * if any params left, it must be a command to fork */ diff --git a/tools/testing/kunit/configs/all_tests_uml.config b/tools/testing/kunit/configs/all_tests_uml.config new file mode 100644 index 000000000000..bdee36bef4a3 --- /dev/null +++ b/tools/testing/kunit/configs/all_tests_uml.config @@ -0,0 +1,37 @@ +# This config enables as many tests as possible under UML. +# It is intended for use in continuous integration systems and similar for +# automated testing of as much as possible. +# The config is manually maintained, though it uses KUNIT_ALL_TESTS=y to enable +# any tests whose dependencies are already satisfied. Please feel free to add +# more options if they any new tests. + +CONFIG_KUNIT=y +CONFIG_KUNIT_EXAMPLE_TEST=y +CONFIG_KUNIT_ALL_TESTS=y + +CONFIG_IIO=y + +CONFIG_EXT4_FS=y + +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y + +CONFIG_VIRTIO_UML=y +CONFIG_UML_PCI_OVER_VIRTIO=y +CONFIG_PCI=y +CONFIG_USB4=y + +CONFIG_NET=y +CONFIG_MCTP=y + +CONFIG_INET=y +CONFIG_MPTCP=y + +CONFIG_DAMON=y +CONFIG_DAMON_VADDR=y +CONFIG_DAMON_PADDR=y +CONFIG_DEBUG_FS=y +CONFIG_DAMON_DBGFS=y + +CONFIG_SECURITY=y +CONFIG_SECURITY_APPARMOR=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 9274c6355809..13bd72e47da8 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -47,11 +47,11 @@ class KunitBuildRequest(KunitConfigRequest): @dataclass class KunitParseRequest: raw_output: Optional[str] - build_dir: str json: Optional[str] @dataclass class KunitExecRequest(KunitParseRequest): + build_dir: str timeout: int alltests: bool filter_glob: str @@ -63,8 +63,6 @@ class KunitRequest(KunitExecRequest, KunitBuildRequest): pass -KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] - def get_kernel_root_path() -> str: path = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(path).split('tools/testing/kunit') @@ -126,7 +124,7 @@ def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) lines.pop() # Filter out any extraneous non-test output that might have gotten mixed in. - return [l for l in lines if re.match('^[^\s.]+\.[^\s.]+$', l)] + return [l for l in lines if re.match(r'^[^\s.]+\.[^\s.]+$', l)] def _suites_from_test_list(tests: List[str]) -> List[str]: """Extracts all the suites from an ordered list of tests.""" @@ -155,6 +153,8 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - test_glob = request.filter_glob.split('.', maxsplit=2)[1] filter_globs = [g + '.'+ test_glob for g in filter_globs] + metadata = kunit_json.Metadata(arch=linux.arch(), build_dir=request.build_dir, def_config='kunit_defconfig') + test_counts = kunit_parser.TestCounts() exec_time = 0.0 for i, filter_glob in enumerate(filter_globs): @@ -167,7 +167,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - filter_glob=filter_glob, build_dir=request.build_dir) - _, test_result = parse_tests(request, run_result) + _, test_result = parse_tests(request, metadata, run_result) # run_kernel() doesn't block on the kernel exiting. # That only happens after we get the last line of output from `run_result`. # So exec_time here actually contains parsing + execution time, which is fine. @@ -188,10 +188,9 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED): return KunitStatus.SUCCESS - else: - return KunitStatus.TEST_FAILURE + return KunitStatus.TEST_FAILURE -def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]: +def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]: parse_start = time.time() test_result = kunit_parser.Test() @@ -206,8 +205,6 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[ pass elif request.raw_output == 'kunit': output = kunit_parser.extract_tap_lines(output) - else: - print(f'Unknown --raw_output option "{request.raw_output}"', file=sys.stderr) for line in output: print(line.rstrip()) @@ -216,13 +213,16 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[ parse_end = time.time() if request.json: - json_obj = kunit_json.get_json_result( + json_str = kunit_json.get_json_result( test=test_result, - def_config='kunit_defconfig', - build_dir=request.build_dir, - json_path=request.json) + metadata=metadata) if request.json == 'stdout': - print(json_obj) + print(json_str) + else: + with open(request.json, 'w') as f: + f.write(json_str) + kunit_parser.print_with_timestamp("Test results stored in %s" % + os.path.abspath(request.json)) if test_result.status != kunit_parser.TestStatus.SUCCESS: return KunitResult(KunitStatus.TEST_FAILURE, parse_end - parse_start), test_result @@ -281,10 +281,10 @@ def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' 'directory.', - type=str, default='.kunit', metavar='build_dir') + type=str, default='.kunit', metavar='DIR') parser.add_argument('--make_options', help='X=Y make option, can be repeated.', - action='append') + action='append', metavar='X=Y') parser.add_argument('--alltests', help='Run all KUnit tests through allyesconfig', action='store_true') @@ -292,11 +292,11 @@ def add_common_opts(parser) -> None: help='Path to Kconfig fragment that enables KUnit tests.' ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' 'will get automatically appended.', - metavar='kunitconfig') + metavar='PATH') parser.add_argument('--kconfig_add', help='Additional Kconfig options to append to the ' '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.', - action='append') + action='append', metavar='CONFIG_X=Y') parser.add_argument('--arch', help=('Specifies the architecture to run tests under. ' @@ -304,7 +304,7 @@ def add_common_opts(parser) -> None: 'string passed to the ARCH make param, ' 'e.g. i386, x86_64, arm, um, etc. Non-UML ' 'architectures run on QEMU.'), - type=str, default='um', metavar='arch') + type=str, default='um', metavar='ARCH') parser.add_argument('--cross_compile', help=('Sets make\'s CROSS_COMPILE variable; it should ' @@ -316,18 +316,18 @@ def add_common_opts(parser) -> None: 'if you have downloaded the microblaze toolchain ' 'from the 0-day website to a directory in your ' 'home directory called `toolchains`).'), - metavar='cross_compile') + metavar='PREFIX') parser.add_argument('--qemu_config', help=('Takes a path to a path to a file containing ' 'a QemuArchParams object.'), - type=str, metavar='qemu_config') + type=str, metavar='FILE') def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', - type=int, default=get_default_jobs(), metavar='jobs') + type=int, default=get_default_jobs(), metavar='N') def add_exec_opts(parser) -> None: parser.add_argument('--timeout', @@ -336,7 +336,7 @@ def add_exec_opts(parser) -> None: 'tests.', type=int, default=300, - metavar='timeout') + metavar='SECONDS') parser.add_argument('filter_glob', help='Filter which KUnit test suites/tests run at ' 'boot-time, e.g. list* or list*.*del_test', @@ -346,24 +346,24 @@ def add_exec_opts(parser) -> None: metavar='filter_glob') parser.add_argument('--kernel_args', help='Kernel command-line parameters. Maybe be repeated', - action='append') + action='append', metavar='') parser.add_argument('--run_isolated', help='If set, boot the kernel for each ' 'individual suite/test. This is can be useful for debugging ' 'a non-hermetic test, one that might pass/fail based on ' 'what ran before it.', type=str, - choices=['suite', 'test']), + choices=['suite', 'test']) def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='If set don\'t format output from kernel. ' 'If set to --raw_output=kunit, filters to just KUnit output.', - type=str, nargs='?', const='all', default=None) + type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) parser.add_argument('--json', nargs='?', help='Stores test results in a JSON, and either ' 'prints to stdout or saves to file if a ' 'filename is specified', - type=str, const='stdout', default=None) + type=str, const='stdout', default=None, metavar='FILE') def main(argv, linux=None): parser = argparse.ArgumentParser( @@ -496,16 +496,17 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'parse': - if cli_args.file == None: + if cli_args.file is None: sys.stdin.reconfigure(errors='backslashreplace') # pytype: disable=attribute-error kunit_output = sys.stdin else: with open(cli_args.file, 'r', errors='backslashreplace') as f: kunit_output = f.read().splitlines() + # We know nothing about how the result was created! + metadata = kunit_json.Metadata() request = KunitParseRequest(raw_output=cli_args.raw_output, - build_dir='', json=cli_args.json) - result, _ = parse_tests(request, kunit_output) + result, _ = parse_tests(request, metadata, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) else: diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 677354546156..75a8dc1683d4 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -6,29 +6,29 @@ # Author: Felix Guo <felixguoxiuping@gmail.com> # Author: Brendan Higgins <brendanhiggins@google.com> -import collections +from dataclasses import dataclass import re from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' -KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value']) - -class KconfigEntry(KconfigEntryBase): +@dataclass(frozen=True) +class KconfigEntry: + name: str + value: str def __str__(self) -> str: if self.value == 'n': - return r'# CONFIG_%s is not set' % (self.name) - else: - return r'CONFIG_%s=%s' % (self.name, self.value) + return f'# CONFIG_{self.name} is not set' + return f'CONFIG_{self.name}={self.value}' class KconfigParseError(Exception): """Error parsing Kconfig defconfig or .config.""" -class Kconfig(object): +class Kconfig: """Represents defconfig or .config specified using the Kconfig language.""" def __init__(self) -> None: @@ -48,7 +48,7 @@ class Kconfig(object): if a.value == 'n': continue return False - elif a.value != b: + if a.value != b: return False return True @@ -90,6 +90,5 @@ def parse_from_string(blob: str) -> Kconfig: if line[0] == '#': continue - else: - raise KconfigParseError('Failed to parse: ' + line) + raise KconfigParseError('Failed to parse: ' + line) return kconfig diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 6862671709bc..10ff65689dd8 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -6,60 +6,58 @@ # Copyright (C) 2020, Google LLC. # Author: Heidi Fahim <heidifahim@google.com> +from dataclasses import dataclass import json -import os - -import kunit_parser +from typing import Any, Dict from kunit_parser import Test, TestStatus -from typing import Any, Dict, Optional + +@dataclass +class Metadata: + """Stores metadata about this run to include in get_json_result().""" + arch: str = '' + def_config: str = '' + build_dir: str = '' JsonObj = Dict[str, Any] -def _get_group_json(test: Test, def_config: str, - build_dir: Optional[str]) -> JsonObj: +_status_map: Dict[TestStatus, str] = { + TestStatus.SUCCESS: "PASS", + TestStatus.SKIPPED: "SKIP", + TestStatus.TEST_CRASHED: "ERROR", +} + +def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj: sub_groups = [] # List[JsonObj] test_cases = [] # List[JsonObj] for subtest in test.subtests: - if len(subtest.subtests): - sub_group = _get_group_json(subtest, def_config, - build_dir) + if subtest.subtests: + sub_group = _get_group_json(subtest, common_fields) sub_groups.append(sub_group) - else: - test_case = {"name": subtest.name, "status": "FAIL"} - if subtest.status == TestStatus.SUCCESS: - test_case["status"] = "PASS" - elif subtest.status == TestStatus.SKIPPED: - test_case["status"] = "SKIP" - elif subtest.status == TestStatus.TEST_CRASHED: - test_case["status"] = "ERROR" - test_cases.append(test_case) + continue + status = _status_map.get(subtest.status, "FAIL") + test_cases.append({"name": subtest.name, "status": status}) test_group = { "name": test.name, - "arch": "UM", - "defconfig": def_config, - "build_environment": build_dir, "sub_groups": sub_groups, "test_cases": test_cases, + } + test_group.update(common_fields) + return test_group + +def get_json_result(test: Test, metadata: Metadata) -> str: + common_fields = { + "arch": metadata.arch, + "defconfig": metadata.def_config, + "build_environment": metadata.build_dir, "lab_name": None, "kernel": None, "job": None, "git_branch": "kselftest", } - return test_group -def get_json_result(test: Test, def_config: str, - build_dir: Optional[str], json_path: str) -> str: - test_group = _get_group_json(test, def_config, build_dir) + test_group = _get_group_json(test, common_fields) test_group["name"] = "KUnit Test Group" - json_obj = json.dumps(test_group, indent=4) - if json_path != 'stdout': - with open(json_path, 'w') as result_path: - result_path.write(json_obj) - root = __file__.split('tools/testing/kunit/')[0] - kunit_parser.print_with_timestamp( - "Test results stored in %s" % - os.path.join(root, result_path.name)) - return json_obj + return json.dumps(test_group, indent=4) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 3c4196cef3ed..3539efaf99ba 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -11,6 +11,7 @@ import importlib.util import logging import subprocess import os +import shlex import shutil import signal import threading @@ -29,11 +30,6 @@ OUTFILE_PATH = 'test.log' ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__)) QEMU_CONFIGS_DIR = os.path.join(ABS_TOOL_PATH, 'qemu_configs') -def get_file_path(build_dir, default): - if build_dir: - default = os.path.join(build_dir, default) - return default - class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -42,7 +38,7 @@ class BuildError(Exception): """Represents an error trying to build the Linux kernel.""" -class LinuxSourceTreeOperations(object): +class LinuxSourceTreeOperations: """An abstraction over command line operations performed on a source tree.""" def __init__(self, linux_arch: str, cross_compile: Optional[str]): @@ -57,20 +53,18 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_arch_qemuconfig(self, kconfig: kunit_config.Kconfig) -> None: + def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: pass - def make_allyesconfig(self, build_dir, make_options) -> None: + def make_allyesconfig(self, build_dir: str, make_options) -> None: raise ConfigError('Only the "um" arch is supported for alltests') - def make_olddefconfig(self, build_dir, make_options) -> None: - command = ['make', 'ARCH=' + self._linux_arch, 'olddefconfig'] + def make_olddefconfig(self, build_dir: str, make_options) -> None: + command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, 'olddefconfig'] if self._cross_compile: command += ['CROSS_COMPILE=' + self._cross_compile] if make_options: command.extend(make_options) - if build_dir: - command += ['O=' + build_dir] print('Populating config with:\n$', ' '.join(command)) try: subprocess.check_output(command, stderr=subprocess.STDOUT) @@ -79,14 +73,12 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make(self, jobs, build_dir, make_options) -> None: - command = ['make', 'ARCH=' + self._linux_arch, '--jobs=' + str(jobs)] + def make(self, jobs, build_dir: str, make_options) -> None: + command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)] if make_options: command.extend(make_options) if self._cross_compile: command += ['CROSS_COMPILE=' + self._cross_compile] - if build_dir: - command += ['O=' + build_dir] print('Building with:\n$', ' '.join(command)) try: proc = subprocess.Popen(command, @@ -127,16 +119,17 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): '-nodefaults', '-m', '1024', '-kernel', kernel_path, - '-append', '\'' + ' '.join(params + [self._kernel_command_line]) + '\'', + '-append', ' '.join(params + [self._kernel_command_line]), '-no-reboot', '-nographic', - '-serial stdio'] + self._extra_qemu_params - print('Running tests with:\n$', ' '.join(qemu_command)) - return subprocess.Popen(' '.join(qemu_command), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, shell=True, errors='backslashreplace') + '-serial', 'stdio'] + self._extra_qemu_params + # Note: shlex.join() does what we want, but requires python 3.8+. + print('Running tests with:\n$', ' '.join(shlex.quote(arg) for arg in qemu_command)) + return subprocess.Popen(qemu_command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, errors='backslashreplace') class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): """An abstraction over command line operations performed on a source tree.""" @@ -144,14 +137,12 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): def __init__(self, cross_compile=None): super().__init__(linux_arch='um', cross_compile=cross_compile) - def make_allyesconfig(self, build_dir, make_options) -> None: + def make_allyesconfig(self, build_dir: str, make_options) -> None: kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') - command = ['make', 'ARCH=um', 'allyesconfig'] + command = ['make', 'ARCH=um', 'O=' + build_dir, 'allyesconfig'] if make_options: command.extend(make_options) - if build_dir: - command += ['O=' + build_dir] process = subprocess.Popen( command, stdout=subprocess.DEVNULL, @@ -168,30 +159,30 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): def start(self, params: List[str], build_dir: str) -> subprocess.Popen: """Runs the Linux UML binary. Must be named 'linux'.""" - linux_bin = get_file_path(build_dir, 'linux') + linux_bin = os.path.join(build_dir, 'linux') return subprocess.Popen([linux_bin] + params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, errors='backslashreplace') -def get_kconfig_path(build_dir) -> str: - return get_file_path(build_dir, KCONFIG_PATH) +def get_kconfig_path(build_dir: str) -> str: + return os.path.join(build_dir, KCONFIG_PATH) -def get_kunitconfig_path(build_dir) -> str: - return get_file_path(build_dir, KUNITCONFIG_PATH) +def get_kunitconfig_path(build_dir: str) -> str: + return os.path.join(build_dir, KUNITCONFIG_PATH) -def get_old_kunitconfig_path(build_dir) -> str: - return get_file_path(build_dir, OLD_KUNITCONFIG_PATH) +def get_old_kunitconfig_path(build_dir: str) -> str: + return os.path.join(build_dir, OLD_KUNITCONFIG_PATH) -def get_outfile_path(build_dir) -> str: - return get_file_path(build_dir, OUTFILE_PATH) +def get_outfile_path(build_dir: str) -> str: + return os.path.join(build_dir, OUTFILE_PATH) def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceTreeOperations: config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py') if arch == 'um': return LinuxSourceTreeOperationsUml(cross_compile=cross_compile) - elif os.path.isfile(config_path): + if os.path.isfile(config_path): return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1] options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] @@ -222,7 +213,7 @@ def get_source_tree_ops_from_qemu_config(config_path: str, return params.linux_arch, LinuxSourceTreeOperationsQemu( params, cross_compile=cross_compile) -class LinuxSourceTree(object): +class LinuxSourceTree: """Represents a Linux kernel source tree with KUnit tests.""" def __init__( @@ -260,6 +251,8 @@ class LinuxSourceTree(object): kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add)) self._kconfig.merge_in_entries(kconfig) + def arch(self) -> str: + return self._arch def clean(self) -> bool: try: @@ -269,7 +262,7 @@ class LinuxSourceTree(object): return False return True - def validate_config(self, build_dir) -> bool: + def validate_config(self, build_dir: str) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.parse_file(kconfig_path) if self._kconfig.is_subset_of(validated_kconfig): @@ -284,7 +277,7 @@ class LinuxSourceTree(object): logging.error(message) return False - def build_config(self, build_dir, make_options) -> bool: + def build_config(self, build_dir: str, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) @@ -312,7 +305,7 @@ class LinuxSourceTree(object): old_kconfig = kunit_config.parse_file(old_path) return old_kconfig.entries() != self._kconfig.entries() - def build_reconfig(self, build_dir, make_options) -> bool: + def build_reconfig(self, build_dir: str, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) if not os.path.exists(kconfig_path): @@ -327,7 +320,7 @@ class LinuxSourceTree(object): os.remove(kconfig_path) return self.build_config(build_dir, make_options) - def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool: + def build_kernel(self, alltests, jobs, build_dir: str, make_options) -> bool: try: if alltests: self._ops.make_allyesconfig(build_dir, make_options) @@ -375,6 +368,6 @@ class LinuxSourceTree(object): waiter.join() subprocess.call(['stty', 'sane']) - def signal_handler(self, sig, frame) -> None: + def signal_handler(self, unused_sig, unused_frame) -> None: logging.error('Build interruption occurred. Cleaning console.') subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 05ff334761dd..c5569b367c69 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -11,13 +11,13 @@ from __future__ import annotations import re +import sys import datetime from enum import Enum, auto -from functools import reduce from typing import Iterable, Iterator, List, Optional, Tuple -class Test(object): +class Test: """ A class to represent a test parsed from KTAP results. All KTAP results within a test log are stored in a main Test object as @@ -45,10 +45,8 @@ class Test(object): def __str__(self) -> str: """Returns string representation of a Test class object.""" - return ('Test(' + str(self.status) + ', ' + self.name + - ', ' + str(self.expected_count) + ', ' + - str(self.subtests) + ', ' + str(self.log) + ', ' + - str(self.counts) + ')') + return (f'Test({self.status}, {self.name}, {self.expected_count}, ' + f'{self.subtests}, {self.log}, {self.counts})') def __repr__(self) -> str: """Returns string representation of a Test class object.""" @@ -57,7 +55,7 @@ class Test(object): def add_error(self, error_message: str) -> None: """Records an error that occurred while parsing this test.""" self.counts.errors += 1 - print_error('Test ' + self.name + ': ' + error_message) + print_with_timestamp(red('[ERROR]') + f' Test: {self.name}: {error_message}') class TestStatus(Enum): """An enumeration class to represent the status of a test.""" @@ -91,13 +89,12 @@ class TestCounts: self.errors = 0 def __str__(self) -> str: - """Returns the string representation of a TestCounts object. - """ - return ('Passed: ' + str(self.passed) + - ', Failed: ' + str(self.failed) + - ', Crashed: ' + str(self.crashed) + - ', Skipped: ' + str(self.skipped) + - ', Errors: ' + str(self.errors)) + """Returns the string representation of a TestCounts object.""" + statuses = [('passed', self.passed), ('failed', self.failed), + ('crashed', self.crashed), ('skipped', self.skipped), + ('errors', self.errors)] + return f'Ran {self.total()} tests: ' + \ + ', '.join(f'{s}: {n}' for s, n in statuses if n > 0) def total(self) -> int: """Returns the total number of test cases within a test @@ -128,31 +125,19 @@ class TestCounts: """ if self.total() == 0: return TestStatus.NO_TESTS - elif self.crashed: - # If one of the subtests crash, the expected status - # of the Test is crashed. + if self.crashed: + # Crashes should take priority. return TestStatus.TEST_CRASHED - elif self.failed: - # Otherwise if one of the subtests fail, the - # expected status of the Test is failed. + if self.failed: return TestStatus.FAILURE - elif self.passed: - # Otherwise if one of the subtests pass, the - # expected status of the Test is passed. + if self.passed: + # No failures or crashes, looks good! return TestStatus.SUCCESS - else: - # Finally, if none of the subtests have failed, - # crashed, or passed, the expected status of the - # Test is skipped. - return TestStatus.SKIPPED + # We have only skipped tests. + return TestStatus.SKIPPED def add_status(self, status: TestStatus) -> None: - """ - Increments count of inputted status. - - Parameters: - status - status to be added to the TestCounts object - """ + """Increments the count for `status`.""" if status == TestStatus.SUCCESS: self.passed += 1 elif status == TestStatus.FAILURE: @@ -282,11 +267,9 @@ def check_version(version_num: int, accepted_versions: List[int], test - Test object for current test being parsed """ if version_num < min(accepted_versions): - test.add_error(version_type + - ' version lower than expected!') + test.add_error(f'{version_type} version lower than expected!') elif version_num > max(accepted_versions): - test.add_error( - version_type + ' version higher than expected!') + test.add_error(f'{version_type} version higer than expected!') def parse_ktap_header(lines: LineStream, test: Test) -> bool: """ @@ -396,7 +379,7 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: if not match: return False name = match.group(4) - return (name == test.name) + return name == test.name def parse_test_result(lines: LineStream, test: Test, expected_num: int) -> bool: @@ -439,8 +422,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check test num num = int(match.group(2)) if num != expected_num: - test.add_error('Expected test number ' + - str(expected_num) + ' but found ' + str(num)) + test.add_error(f'Expected test number {expected_num} but found {num}') # Set status of test object status = match.group(1) @@ -474,26 +456,6 @@ def parse_diagnostic(lines: LineStream) -> List[str]: log.append(lines.pop()) return log -DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^# .*?: kunit test case crashed!$') - -def parse_crash_in_log(test: Test) -> bool: - """ - Iterate through the lines of the log to parse for crash message. - If crash message found, set status to crashed and return True. - Otherwise return False. - - Parameters: - test - Test object for current test being parsed - - Return: - True if crash message found in log - """ - for line in test.log: - if DIAGNOSTIC_CRASH_MESSAGE.match(line): - test.status = TestStatus.TEST_CRASHED - return True - return False - # Printing helper methods: @@ -503,14 +465,20 @@ RESET = '\033[0;0m' def red(text: str) -> str: """Returns inputted string with red color code.""" + if not sys.stdout.isatty(): + return text return '\033[1;31m' + text + RESET def yellow(text: str) -> str: """Returns inputted string with yellow color code.""" + if not sys.stdout.isatty(): + return text return '\033[1;33m' + text + RESET def green(text: str) -> str: """Returns inputted string with green color code.""" + if not sys.stdout.isatty(): + return text return '\033[1;32m' + text + RESET ANSI_LEN = len(red('')) @@ -542,7 +510,7 @@ def format_test_divider(message: str, len_message: int) -> str: # calculate number of dashes for each side of the divider len_1 = int(difference / 2) len_2 = difference - len_1 - return ('=' * len_1) + ' ' + message + ' ' + ('=' * len_2) + return ('=' * len_1) + f' {message} ' + ('=' * len_2) def print_test_header(test: Test) -> None: """ @@ -558,20 +526,13 @@ def print_test_header(test: Test) -> None: message = test.name if test.expected_count: if test.expected_count == 1: - message += (' (' + str(test.expected_count) + - ' subtest)') + message += ' (1 subtest)' else: - message += (' (' + str(test.expected_count) + - ' subtests)') + message += f' ({test.expected_count} subtests)' print_with_timestamp(format_test_divider(message, len(message))) def print_log(log: Iterable[str]) -> None: - """ - Prints all strings in saved log for test in yellow. - - Parameters: - log - Iterable object with all strings saved in log for test - """ + """Prints all strings in saved log for test in yellow.""" for m in log: print_with_timestamp(yellow(m)) @@ -590,17 +551,16 @@ def format_test_result(test: Test) -> str: String containing formatted test result """ if test.status == TestStatus.SUCCESS: - return (green('[PASSED] ') + test.name) - elif test.status == TestStatus.SKIPPED: - return (yellow('[SKIPPED] ') + test.name) - elif test.status == TestStatus.NO_TESTS: - return (yellow('[NO TESTS RUN] ') + test.name) - elif test.status == TestStatus.TEST_CRASHED: + return green('[PASSED] ') + test.name + if test.status == TestStatus.SKIPPED: + return yellow('[SKIPPED] ') + test.name + if test.status == TestStatus.NO_TESTS: + return yellow('[NO TESTS RUN] ') + test.name + if test.status == TestStatus.TEST_CRASHED: print_log(test.log) - return (red('[CRASHED] ') + test.name) - else: - print_log(test.log) - return (red('[FAILED] ') + test.name) + return red('[CRASHED] ') + test.name + print_log(test.log) + return red('[FAILED] ') + test.name def print_test_result(test: Test) -> None: """ @@ -644,24 +604,11 @@ def print_summary_line(test: Test) -> None: """ if test.status == TestStatus.SUCCESS: color = green - elif test.status == TestStatus.SKIPPED or test.status == TestStatus.NO_TESTS: + elif test.status in (TestStatus.SKIPPED, TestStatus.NO_TESTS): color = yellow else: color = red - counts = test.counts - print_with_timestamp(color('Testing complete. ' + str(counts))) - -def print_error(error_message: str) -> None: - """ - Prints error message with error format. - - Example: - "[ERROR] Test example: missing test plan!" - - Parameters: - error_message - message describing error - """ - print_with_timestamp(red('[ERROR] ') + error_message) + print_with_timestamp(color(f'Testing complete. {test.counts}')) # Other methods: @@ -675,7 +622,6 @@ def bubble_up_test_results(test: Test) -> None: Parameters: test - Test object for current test being parsed """ - parse_crash_in_log(test) subtests = test.subtests counts = test.counts status = test.status @@ -789,8 +735,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: # Check for there being no tests if parent_test and len(subtests) == 0: - test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') + # Don't override a bad status if this test had one reported. + # Assumption: no subtests means CRASHED is from Test.__init__() + if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS): + test.status = TestStatus.NO_TESTS + test.add_error('0 tests run!') # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) @@ -805,7 +754,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: def parse_run_tests(kernel_output: Iterable[str]) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test - results and print condensed test results and summary line . + results and print condensed test results and summary line. Parameters: kernel_output - Iterable object contains lines of kernel output @@ -817,7 +766,8 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test: lines = extract_tap_lines(kernel_output) test = Test() if not lines: - test.add_error('invalid KTAP input!') + test.name = '<missing>' + test.add_error('could not find any KTAP output!') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: test = parse_test(lines, 0, []) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 352369dffbd9..25a2eb3bf114 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -226,19 +226,10 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('invalid KTAP input!')) + print_mock.assert_any_call(StrContains('could not find any KTAP output!')) print_mock.stop() self.assertEqual(0, len(result.subtests)) - def test_crashed_test(self): - crashed_log = test_data_path('test_is_test_passed-crash.log') - with open(crashed_log) as file: - result = kunit_parser.parse_run_tests( - file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.TEST_CRASHED, - result.status) - def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') with open(skipped_log) as file: @@ -260,8 +251,8 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') - file = open(hyphen_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(hyphen_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. self.assertEqual( @@ -356,7 +347,7 @@ class LineStreamTest(unittest.TestCase): called_times = 0 def generator(): nonlocal called_times - for i in range(1,5): + for _ in range(1,5): called_times += 1 yield called_times, str(called_times) @@ -468,9 +459,7 @@ class KUnitJsonTest(unittest.TestCase): test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( test=test_result, - def_config='kunit_defconfig', - build_dir=None, - json_path='stdout') + metadata=kunit_json.Metadata()) return json.loads(json_obj) def test_failed_test_json(self): @@ -480,10 +469,10 @@ class KUnitJsonTest(unittest.TestCase): result["sub_groups"][1]["test_cases"][0]) def test_crashed_test_json(self): - result = self._json_for('test_is_test_passed-crash.log') + result = self._json_for('test_kernel_panic_interrupt.log') self.assertEqual( - {'name': 'example_simple_test', 'status': 'ERROR'}, - result["sub_groups"][1]["test_cases"][0]) + {'name': '', 'status': 'ERROR'}, + result["sub_groups"][2]["test_cases"][1]) def test_skipped_test_json(self): result = self._json_for('test_skip_tests.log') @@ -559,12 +548,13 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.print_mock.assert_any_call(StrContains('invalid KTAP input!')) + self.print_mock.assert_any_call(StrContains('could not find any KTAP output!')) def test_exec_no_tests(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) with self.assertRaises(SystemExit) as e: - kunit.main(['run'], self.linux_source_mock) + kunit.main(['run'], self.linux_source_mock) + self.assertEqual(e.exception.code, 1) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains(' 0 tests run!')) @@ -595,6 +585,12 @@ class KUnitMainTest(unittest.TestCase): self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + def test_run_raw_output_invalid(self): + self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) + with self.assertRaises(SystemExit) as e: + kunit.main(['run', '--raw_output=invalid'], self.linux_source_mock) + self.assertNotEqual(e.exception.code, 0) + def test_run_raw_output_does_not_take_positional_args(self): # --raw_output is a string flag, but we don't want it to consume # any positional arguments, only ones after an '=' @@ -692,7 +688,7 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'suite')) + kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*', None, 'suite')) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. @@ -707,7 +703,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*.test*', None, 'suite')) + kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*.test*', None, 'suite')) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300), @@ -720,7 +716,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'test')) + kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*', None, 'test')) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300), diff --git a/tools/testing/kunit/qemu_config.py b/tools/testing/kunit/qemu_config.py index 1672f6184e95..0b6a80398ccc 100644 --- a/tools/testing/kunit/qemu_config.py +++ b/tools/testing/kunit/qemu_config.py @@ -5,12 +5,15 @@ # Copyright (C) 2021, Google LLC. # Author: Brendan Higgins <brendanhiggins@google.com> -from collections import namedtuple +from dataclasses import dataclass +from typing import List -QemuArchParams = namedtuple('QemuArchParams', ['linux_arch', - 'kconfig', - 'qemu_arch', - 'kernel_path', - 'kernel_command_line', - 'extra_qemu_params']) +@dataclass(frozen=True) +class QemuArchParams: + linux_arch: str + kconfig: str + qemu_arch: str + kernel_path: str + kernel_command_line: str + extra_qemu_params: List[str] diff --git a/tools/testing/kunit/qemu_configs/alpha.py b/tools/testing/kunit/qemu_configs/alpha.py index 5d0c0cff03bd..3ac846e03a6b 100644 --- a/tools/testing/kunit/qemu_configs/alpha.py +++ b/tools/testing/kunit/qemu_configs/alpha.py @@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''', qemu_arch='alpha', kernel_path='arch/alpha/boot/vmlinux', kernel_command_line='console=ttyS0', - extra_qemu_params=['']) + extra_qemu_params=[]) diff --git a/tools/testing/kunit/qemu_configs/arm.py b/tools/testing/kunit/qemu_configs/arm.py index b9c2a35e0296..db2160200566 100644 --- a/tools/testing/kunit/qemu_configs/arm.py +++ b/tools/testing/kunit/qemu_configs/arm.py @@ -10,4 +10,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''', qemu_arch='arm', kernel_path='arch/arm/boot/zImage', kernel_command_line='console=ttyAMA0', - extra_qemu_params=['-machine virt']) + extra_qemu_params=['-machine', 'virt']) diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py index 517c04459f47..67d04064f785 100644 --- a/tools/testing/kunit/qemu_configs/arm64.py +++ b/tools/testing/kunit/qemu_configs/arm64.py @@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''', qemu_arch='aarch64', kernel_path='arch/arm64/boot/Image.gz', kernel_command_line='console=ttyAMA0', - extra_qemu_params=['-machine virt', '-cpu cortex-a57']) + extra_qemu_params=['-machine', 'virt', '-cpu', 'cortex-a57']) diff --git a/tools/testing/kunit/qemu_configs/i386.py b/tools/testing/kunit/qemu_configs/i386.py index aed3ffd3937d..4463ebefd567 100644 --- a/tools/testing/kunit/qemu_configs/i386.py +++ b/tools/testing/kunit/qemu_configs/i386.py @@ -4,7 +4,7 @@ QEMU_ARCH = QemuArchParams(linux_arch='i386', kconfig=''' CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y''', - qemu_arch='x86_64', + qemu_arch='i386', kernel_path='arch/x86/boot/bzImage', kernel_command_line='console=ttyS0', - extra_qemu_params=['']) + extra_qemu_params=[]) diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py index 35e9de24f0db..7ec38d4131f7 100644 --- a/tools/testing/kunit/qemu_configs/powerpc.py +++ b/tools/testing/kunit/qemu_configs/powerpc.py @@ -9,4 +9,4 @@ CONFIG_HVC_CONSOLE=y''', qemu_arch='ppc64', kernel_path='vmlinux', kernel_command_line='console=ttyS0', - extra_qemu_params=['-M pseries', '-cpu power8']) + extra_qemu_params=['-M', 'pseries', '-cpu', 'power8']) diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py index 9e528087cd7c..6207be146d26 100644 --- a/tools/testing/kunit/qemu_configs/riscv.py +++ b/tools/testing/kunit/qemu_configs/riscv.py @@ -21,11 +21,12 @@ CONFIG_SOC_VIRT=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_RISCV_SBI_V01=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y''', qemu_arch='riscv64', kernel_path='arch/riscv/boot/Image', kernel_command_line='console=ttyS0', extra_qemu_params=[ - '-machine virt', - '-cpu rv64', - '-bios opensbi-riscv64-generic-fw_dynamic.bin']) + '-machine', 'virt', + '-cpu', 'rv64', + '-bios', 'opensbi-riscv64-generic-fw_dynamic.bin']) diff --git a/tools/testing/kunit/qemu_configs/s390.py b/tools/testing/kunit/qemu_configs/s390.py index e310bd521113..98fa4fb60c0a 100644 --- a/tools/testing/kunit/qemu_configs/s390.py +++ b/tools/testing/kunit/qemu_configs/s390.py @@ -10,5 +10,5 @@ CONFIG_MODULES=y''', kernel_path='arch/s390/boot/bzImage', kernel_command_line='console=ttyS0', extra_qemu_params=[ - '-machine s390-ccw-virtio', - '-cpu qemu',]) + '-machine', 's390-ccw-virtio', + '-cpu', 'qemu',]) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 27f474e7ad6e..e975c4331a7c 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''', qemu_arch='sparc', kernel_path='arch/sparc/boot/zImage', kernel_command_line='console=ttyS0 mem=256M', - extra_qemu_params=['-m 256']) + extra_qemu_params=['-m', '256']) diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py index 77ab1aeee8a3..dc7949076863 100644 --- a/tools/testing/kunit/qemu_configs/x86_64.py +++ b/tools/testing/kunit/qemu_configs/x86_64.py @@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''', qemu_arch='x86_64', kernel_path='arch/x86/boot/bzImage', kernel_command_line='console=ttyS0', - extra_qemu_params=['']) + extra_qemu_params=[]) diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py index 13d854afca9d..066e6f938f6d 100755 --- a/tools/testing/kunit/run_checks.py +++ b/tools/testing/kunit/run_checks.py @@ -14,7 +14,7 @@ import shutil import subprocess import sys import textwrap -from typing import Dict, List, Sequence, Tuple +from typing import Dict, List, Sequence ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__)) TIMEOUT = datetime.timedelta(minutes=5).total_seconds() diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log deleted file mode 100644 index 4d97f6708c4a..000000000000 --- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log +++ /dev/null @@ -1,70 +0,0 @@ -printk: console [tty0] enabled -printk: console [mc-1] enabled -TAP version 14 -1..2 - # Subtest: sysctl_test - 1..8 - # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed - ok 1 - sysctl_test_dointvec_null_tbl_data - # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed - ok 2 - sysctl_test_dointvec_table_maxlen_unset - # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed - ok 3 - sysctl_test_dointvec_table_len_is_zero - # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed - ok 4 - sysctl_test_dointvec_table_read_but_position_set - # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed - ok 5 - sysctl_test_dointvec_happy_single_positive - # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed - ok 6 - sysctl_test_dointvec_happy_single_negative - # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed - ok 7 - sysctl_test_dointvec_single_less_int_min - # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed - ok 8 - sysctl_test_dointvec_single_greater_int_max -kunit sysctl_test: all tests passed -ok 1 - sysctl_test - # Subtest: example - 1..2 -init_suite - # example_simple_test: initializing -Stack: - 6016f7db 6f81bd30 6f81bdd0 60021450 - 6024b0e8 60021440 60018bbe 16f81bdc0 - 00000001 6f81bd30 6f81bd20 6f81bdd0 -Call Trace: - [<6016f7db>] ? kunit_try_run_case+0xab/0xf0 - [<60021450>] ? set_signals+0x0/0x60 - [<60021440>] ? get_signals+0x0/0x10 - [<60018bbe>] ? kunit_um_run_try_catch+0x5e/0xc0 - [<60021450>] ? set_signals+0x0/0x60 - [<60021440>] ? get_signals+0x0/0x10 - [<60018bb3>] ? kunit_um_run_try_catch+0x53/0xc0 - [<6016f321>] ? kunit_run_case_catch_errors+0x121/0x1a0 - [<60018b60>] ? kunit_um_run_try_catch+0x0/0xc0 - [<600189e0>] ? kunit_um_throw+0x0/0x180 - [<6016f730>] ? kunit_try_run_case+0x0/0xf0 - [<6016f600>] ? kunit_catch_run_case+0x0/0x130 - [<6016edd0>] ? kunit_vprintk+0x0/0x30 - [<6016ece0>] ? kunit_fail+0x0/0x40 - [<6016eca0>] ? kunit_abort+0x0/0x40 - [<6016ed20>] ? kunit_printk_emit+0x0/0xb0 - [<6016f200>] ? kunit_run_case_catch_errors+0x0/0x1a0 - [<6016f46e>] ? kunit_run_tests+0xce/0x260 - [<6005b390>] ? unregister_console+0x0/0x190 - [<60175b70>] ? suite_kunit_initexample_test_suite+0x0/0x20 - [<60001cbb>] ? do_one_initcall+0x0/0x197 - [<60001d47>] ? do_one_initcall+0x8c/0x197 - [<6005cd20>] ? irq_to_desc+0x0/0x30 - [<60002005>] ? kernel_init_freeable+0x1b3/0x272 - [<6005c5ec>] ? printk+0x0/0x9b - [<601c0086>] ? kernel_init+0x26/0x160 - [<60014442>] ? new_thread_handler+0x82/0xc0 - - # example_simple_test: kunit test case crashed! - # example_simple_test: example_simple_test failed - not ok 1 - example_simple_test - # example_mock_test: initializing - # example_mock_test: example_mock_test passed - ok 2 - example_mock_test -kunit example: one or more tests failed -not ok 2 - example -List of all partitions: diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log index dd873c981108..4f81876ee6f1 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log @@ -3,5 +3,5 @@ TAP version 14 # Subtest: suite 1..1 # Subtest: case - ok 1 - case # SKIP + ok 1 - case ok 1 - suite diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2319ec87f53d..0aedcd76cf0f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -9,6 +9,7 @@ TARGETS += clone3 TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug +TARGETS += damon TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec @@ -52,6 +53,7 @@ TARGETS += proc TARGETS += pstore TARGETS += ptrace TARGETS += openat2 +TARGETS += resctrl TARGETS += rlimits TARGETS += rseq TARGETS += rtc diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 1e8d9a8f59df..9460cbe81bcc 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -17,16 +17,7 @@ top_srcdir = $(realpath ../../../../) # Additional include paths needed by kselftest.h and local headers CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ -# Guessing where the Kernel headers could have been installed -# depending on ENV config -ifeq ($(KBUILD_OUTPUT),) -khdr_dir = $(top_srcdir)/usr/include -else -# the KSFT preferred location when KBUILD_OUTPUT is set -khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include -endif - -CFLAGS += -I$(khdr_dir) +CFLAGS += $(KHDR_INCLUDES) export CFLAGS export top_srcdir diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore index b79cf5814c23..b9e54417250d 100644 --- a/tools/testing/selftests/arm64/abi/.gitignore +++ b/tools/testing/selftests/arm64/abi/.gitignore @@ -1 +1,2 @@ syscall-abi +tpidr2 diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index 96eba974ac8d..c8d7f2495eb2 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -1,8 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2021 ARM Limited -TEST_GEN_PROGS := syscall-abi +TEST_GEN_PROGS := syscall-abi tpidr2 include ../../lib.mk $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S + +# Build with nolibc since TPIDR2 is intended to be actively managed by +# libc and we're trying to test the functionality that it depends on here. +$(OUTPUT)/tpidr2: tpidr2.c + $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -static -include ../../../../include/nolibc/nolibc.h \ + -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index 983467cfcee0..b523c21c2278 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -9,15 +9,42 @@ // invoked is configured in x8 of the input GPR data. // // x0: SVE VL, 0 for FP only +// x1: SME VL // // GPRs: gpr_in, gpr_out // FPRs: fpr_in, fpr_out // Zn: z_in, z_out // Pn: p_in, p_out // FFR: ffr_in, ffr_out +// ZA: za_in, za_out +// SVCR: svcr_in, svcr_out + +#include "syscall-abi.h" .arch_extension sve +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + .globl do_syscall do_syscall: // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 @@ -30,6 +57,24 @@ do_syscall: stp x25, x26, [sp, #80] stp x27, x28, [sp, #96] + // Set SVCR if we're doing SME + cbz x1, 1f + adrp x2, svcr_in + ldr x2, [x2, :lo12:svcr_in] + msr S3_3_C4_C2_2, x2 +1: + + // Load ZA if it's enabled - uses x12 as scratch due to SME LDR + tbz x2, #SVCR_ZA_SHIFT, 1f + mov w12, #0 + ldr x2, =za_in +2: _ldr_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 + bne 2b +1: + // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -68,7 +113,7 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] 1: - // Load the SVE registers if we're doing SVE + // Load the SVE registers if we're doing SVE/SME cbz x0, 1f ldr x2, =z_in @@ -105,9 +150,14 @@ do_syscall: ldr z30, [x2, #30, MUL VL] ldr z31, [x2, #31, MUL VL] + // Only set a non-zero FFR, test patterns must be zero since the + // syscall should clear it - this lets us handle FA64. ldr x2, =ffr_in ldr p0, [x2, #0] + ldr x2, [x2, #0] + cbz x2, 2f wrffr p0.b +2: ldr x2, =p_in ldr p0, [x2, #0, MUL VL] @@ -169,6 +219,24 @@ do_syscall: stp q28, q29, [x2, #16 * 28] stp q30, q31, [x2, #16 * 30] + // Save SVCR if we're doing SME + cbz x1, 1f + mrs x2, S3_3_C4_C2_2 + adrp x3, svcr_out + str x2, [x3, :lo12:svcr_out] +1: + + // Save ZA if it's enabled - uses x12 as scratch due to SME STR + tbz x2, #SVCR_ZA_SHIFT, 1f + mov w12, #0 + ldr x2, =za_out +2: _str_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 + bne 2b +1: + // Save the SVE state if we have some cbz x0, 1f @@ -224,6 +292,10 @@ do_syscall: str p14, [x2, #14, MUL VL] str p15, [x2, #15, MUL VL] + // Only save FFR if we wrote a value for SME + ldr x2, =ffr_in + ldr x2, [x2, #0] + cbz x2, 1f ldr x2, =ffr_out rdffr p0.b str p0, [x2, #0] @@ -237,4 +309,9 @@ do_syscall: ldp x27, x28, [sp, #96] ldp x29, x30, [sp], #112 + // Clear SVCR if we were doing SME so future tests don't have ZA + cbz x1, 1f + msr S3_3_C4_C2_2, xzr +1: + ret diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 1e13b7523918..b632bfe9e022 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -18,9 +18,13 @@ #include "../../kselftest.h" +#include "syscall-abi.h" + #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) -extern void do_syscall(int sve_vl); +static int default_sme_vl; + +extern void do_syscall(int sve_vl, int sme_vl); static void fill_random(void *buf, size_t size) { @@ -48,14 +52,15 @@ static struct syscall_cfg { uint64_t gpr_in[NUM_GPR]; uint64_t gpr_out[NUM_GPR]; -static void setup_gpr(struct syscall_cfg *cfg, int sve_vl) +static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(gpr_in, sizeof(gpr_in)); gpr_in[8] = cfg->syscall_nr; memset(gpr_out, 0, sizeof(gpr_out)); } -static int check_gpr(struct syscall_cfg *cfg, int sve_vl) +static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) { int errors = 0; int i; @@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl) uint64_t fpr_in[NUM_FPR * 2]; uint64_t fpr_out[NUM_FPR * 2]; -static void setup_fpr(struct syscall_cfg *cfg, int sve_vl) +static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(fpr_in, sizeof(fpr_in)); memset(fpr_out, 0, sizeof(fpr_out)); } -static int check_fpr(struct syscall_cfg *cfg, int sve_vl) +static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { int errors = 0; int i; @@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -static void setup_z(struct syscall_cfg *cfg, int sve_vl) +static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(z_in, sizeof(z_in)); fill_random(z_out, sizeof(z_out)); } -static int check_z(struct syscall_cfg *cfg, int sve_vl) +static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vl; int errors = 0; @@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) /* * After a syscall the low 128 bits of the Z registers should - * be preserved and the rest be zeroed or preserved. + * be preserved and the rest be zeroed or preserved, except if + * we were in streaming mode in which case the low 128 bits may + * also be cleared by the transition out of streaming mode. */ for (i = 0; i < SVE_NUM_ZREGS; i++) { void *in = &z_in[reg_size * i]; void *out = &z_out[reg_size * i]; - if (memcmp(in, out, SVE_VQ_BYTES) != 0) { + if ((memcmp(in, out, SVE_VQ_BYTES) != 0) && + !((svcr & SVCR_SM_MASK) && + memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) { ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", cfg->name, sve_vl, i); errors++; @@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; -static void setup_p(struct syscall_cfg *cfg, int sve_vl) +static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(p_in, sizeof(p_in)); fill_random(p_out, sizeof(p_out)); } -static int check_p(struct syscall_cfg *cfg, int sve_vl) +static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ @@ -175,9 +190,20 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl) uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; -static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) +static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { /* + * If we are in streaming mode and do not have FA64 then FFR + * is unavailable. + */ + if ((svcr & SVCR_SM_MASK) && + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) { + memset(&ffr_in, 0, sizeof(ffr_in)); + return; + } + + /* * It is only valid to set a contiguous set of bits starting * at 0. For now since we're expecting this to be cleared by * a syscall just set all bits. @@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) fill_random(ffr_out, sizeof(ffr_out)); } -static int check_ffr(struct syscall_cfg *cfg, int sve_vl) +static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ int errors = 0; @@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) if (!sve_vl) return 0; + if ((svcr & SVCR_SM_MASK) && + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) + return 0; + /* After a syscall the P registers should be preserved or zeroed */ for (i = 0; i < reg_size; i++) if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) @@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) return errors; } -typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl); -typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl); +uint64_t svcr_in, svcr_out; + +static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + svcr_in = svcr; +} + +static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + int errors = 0; + + if (svcr_out & SVCR_SM_MASK) { + ksft_print_msg("%s Still in SM, SVCR %llx\n", + cfg->name, svcr_out); + errors++; + } + + if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { + ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", + cfg->name, svcr_in, svcr_out); + errors++; + } + + return errors; +} + +uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; + +static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + fill_random(za_in, sizeof(za_in)); + memset(za_out, 0, sizeof(za_out)); +} + +static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + size_t reg_size = sme_vl * sme_vl; + int errors = 0; + + if (!(svcr & SVCR_ZA_MASK)) + return 0; + + if (memcmp(za_in, za_out, reg_size) != 0) { + ksft_print_msg("SME VL %d ZA does not match\n", sme_vl); + errors++; + } + + return errors; +} + +typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr); +typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr); /* * Each set of registers has a setup function which is called before @@ -225,20 +313,23 @@ static struct { { setup_z, check_z }, { setup_p, check_p }, { setup_ffr, check_ffr }, + { setup_svcr, check_svcr }, + { setup_za, check_za }, }; -static bool do_test(struct syscall_cfg *cfg, int sve_vl) +static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { int errors = 0; int i; for (i = 0; i < ARRAY_SIZE(regset); i++) - regset[i].setup(cfg, sve_vl); + regset[i].setup(cfg, sve_vl, sme_vl, svcr); - do_syscall(sve_vl); + do_syscall(sve_vl, sme_vl); for (i = 0; i < ARRAY_SIZE(regset); i++) - errors += regset[i].check(cfg, sve_vl); + errors += regset[i].check(cfg, sve_vl, sme_vl, svcr); return errors == 0; } @@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl) static void test_one_syscall(struct syscall_cfg *cfg) { int sve_vq, sve_vl; + int sme_vq, sme_vl; /* FPSIMD only case */ - ksft_test_result(do_test(cfg, 0), + ksft_test_result(do_test(cfg, 0, default_sme_vl, 0), "%s FPSIMD\n", cfg->name); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) @@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg) if (sve_vq != sve_vq_from_vl(sve_vl)) sve_vq = sve_vq_from_vl(sve_vl); - ksft_test_result(do_test(cfg, sve_vl), + ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), "%s SVE VL %d\n", cfg->name, sve_vl); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) + continue; + + for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) { + sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16); + if (sme_vl == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + sme_vl &= PR_SME_VL_LEN_MASK; + + if (sme_vq != sve_vq_from_vl(sme_vl)) + sme_vq = sve_vq_from_vl(sme_vl); + + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_ZA_MASK | SVCR_SM_MASK), + "%s SVE VL %d/SME VL %d SM+ZA\n", + cfg->name, sve_vl, sme_vl); + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_SM_MASK), + "%s SVE VL %d/SME VL %d SM\n", + cfg->name, sve_vl, sme_vl); + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_ZA_MASK), + "%s SVE VL %d/SME VL %d ZA\n", + cfg->name, sve_vl, sme_vl); + } } } @@ -299,14 +419,54 @@ int sve_count_vls(void) return vl_count; } +int sme_count_vls(void) +{ + unsigned int vq; + int vl_count = 0; + int vl; + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) + return 0; + + /* Ensure we configure a SME VL, used to flag if SVCR is set */ + default_sme_vl = 16; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SME_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SME_VL_LEN_MASK; + + if (vq != sve_vq_from_vl(vl)) + vq = sve_vq_from_vl(vl); + + vl_count++; + } + + return vl_count; +} + int main(void) { int i; + int tests = 1; /* FPSIMD */ srandom(getpid()); ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1)); + tests += sve_count_vls(); + tests += (sve_count_vls() * sme_count_vls()) * 3; + ksft_set_plan(ARRAY_SIZE(syscalls) * tests); + + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) + ksft_print_msg("SME with FA64\n"); + else if (getauxval(AT_HWCAP2) & HWCAP2_SME) + ksft_print_msg("SME without FA64\n"); for (i = 0; i < ARRAY_SIZE(syscalls); i++) test_one_syscall(&syscalls[i]); diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h new file mode 100644 index 000000000000..bda5a87ad381 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 ARM Limited. + */ + +#ifndef SYSCALL_ABI_H +#define SYSCALL_ABI_H + +#define SVCR_ZA_MASK 2 +#define SVCR_SM_MASK 1 + +#define SVCR_ZA_SHIFT 1 +#define SVCR_SM_SHIFT 0 + +#endif diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c new file mode 100644 index 000000000000..351a098b503a --- /dev/null +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/sched.h> +#include <linux/wait.h> + +#define SYS_TPIDR2 "S3_3_C13_C0_5" + +#define EXPECTED_TESTS 5 + +static void putstr(const char *str) +{ + write(1, str, strlen(str)); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +static int tests_run; +static int tests_passed; +static int tests_failed; +static int tests_skipped; + +static void set_tpidr2(uint64_t val) +{ + asm volatile ( + "msr " SYS_TPIDR2 ", %0\n" + : + : "r"(val) + : "cc"); +} + +static uint64_t get_tpidr2(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, " SYS_TPIDR2 "\n" + : "=r"(val) + : + : "cc"); + + return val; +} + +static void print_summary(void) +{ + if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) + putstr("# UNEXPECTED TEST COUNT: "); + + putstr("# Totals: pass:"); + putnum(tests_passed); + putstr(" fail:"); + putnum(tests_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(tests_skipped); + putstr(" error:0\n"); +} + +/* Processes should start with TPIDR2 == 0 */ +static int default_value(void) +{ + return get_tpidr2() == 0; +} + +/* If we set TPIDR2 we should read that value */ +static int write_read(void) +{ + set_tpidr2(getpid()); + + return getpid() == get_tpidr2(); +} + +/* If we set a value we should read the same value after scheduling out */ +static int write_sleep_read(void) +{ + set_tpidr2(getpid()); + + msleep(100); + + return getpid() == get_tpidr2(); +} + +/* + * If we fork the value in the parent should be unchanged and the + * child should start with the same value and be able to set its own + * value. + */ +static int write_fork_read(void) +{ + pid_t newpid, waiting, oldpid; + int status; + + set_tpidr2(getpid()); + + oldpid = getpid(); + newpid = fork(); + if (newpid == 0) { + /* In child */ + if (get_tpidr2() != oldpid) { + putstr("# TPIDR2 changed in child: "); + putnum(get_tpidr2()); + putstr("\n"); + exit(0); + } + + set_tpidr2(getpid()); + if (get_tpidr2() == getpid()) { + exit(1); + } else { + putstr("# Failed to set TPIDR2 in child\n"); + exit(0); + } + } + if (newpid < 0) { + putstr("# fork() failed: -"); + putnum(-newpid); + putstr("\n"); + return 0; + } + + for (;;) { + waiting = waitpid(newpid, &status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + putstr("# waitpid() failed: "); + putnum(errno); + putstr("\n"); + return 0; + } + if (waiting != newpid) { + putstr("# waitpid() returned wrong PID\n"); + return 0; + } + + if (!WIFEXITED(status)) { + putstr("# child did not exit\n"); + return 0; + } + + if (getpid() != get_tpidr2()) { + putstr("# TPIDR2 corrupted in parent\n"); + return 0; + } + + return WEXITSTATUS(status); + } +} + +/* + * sys_clone() has a lot of per architecture variation so just define + * it here rather than adding it to nolibc, plus the raw API is a + * little more convenient for this test. + */ +static int sys_clone(unsigned long clone_flags, unsigned long newsp, + int *parent_tidptr, unsigned long tls, + int *child_tidptr) +{ + return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls, + child_tidptr); +} + +/* + * If we clone with CLONE_SETTLS then the value in the parent should + * be unchanged and the child should start with zero and be able to + * set its own value. + */ +static int write_clone_read(void) +{ + int parent_tid, child_tid; + pid_t parent, waiting; + int ret, status; + + parent = getpid(); + set_tpidr2(parent); + + ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid); + if (ret == -1) { + putstr("# clone() failed\n"); + putnum(errno); + putstr("\n"); + return 0; + } + + if (ret == 0) { + /* In child */ + if (get_tpidr2() != 0) { + putstr("# TPIDR2 non-zero in child: "); + putnum(get_tpidr2()); + putstr("\n"); + exit(0); + } + + if (gettid() == 0) + putstr("# Child TID==0\n"); + set_tpidr2(gettid()); + if (get_tpidr2() == gettid()) { + exit(1); + } else { + putstr("# Failed to set TPIDR2 in child\n"); + exit(0); + } + } + + for (;;) { + waiting = wait4(ret, &status, __WCLONE, NULL); + + if (waiting < 0) { + if (errno == EINTR) + continue; + putstr("# wait4() failed: "); + putnum(errno); + putstr("\n"); + return 0; + } + if (waiting != ret) { + putstr("# wait4() returned wrong PID "); + putnum(waiting); + putstr("\n"); + return 0; + } + + if (!WIFEXITED(status)) { + putstr("# child did not exit\n"); + return 0; + } + + if (parent != get_tpidr2()) { + putstr("# TPIDR2 corrupted in parent\n"); + return 0; + } + + return WEXITSTATUS(status); + } +} + +#define run_test(name) \ + if (name()) { \ + tests_passed++; \ + } else { \ + tests_failed++; \ + putstr("not "); \ + } \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" " #name "\n"); + +int main(int argc, char **argv) +{ + int ret, i; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + putstr("# PID: "); + putnum(getpid()); + putstr("\n"); + + /* + * This test is run with nolibc which doesn't support hwcap and + * it's probably disproportionate to implement so instead check + * for the default vector length configuration in /proc. + */ + ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); + if (ret >= 0) { + run_test(default_value); + run_test(write_read); + run_test(write_sleep_read); + run_test(write_fork_read); + run_test(write_clone_read); + + } else { + putstr("# SME support not present\n"); + + for (i = 0; i < EXPECTED_TESTS; i++) { + putstr("ok "); + putnum(i); + putstr(" skipped, TPIDR2 not supported\n"); + } + + tests_skipped += EXPECTED_TESTS; + } + + print_summary(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile index 73e013c082a6..ccdac414ad94 100644 --- a/tools/testing/selftests/arm64/bti/Makefile +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -10,7 +10,7 @@ PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) # cases for statically linked and dynamically lined binaries are # slightly different. -CFLAGS_NOBTI = -DBTI=0 +CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0 CFLAGS_BTI = -mbranch-protection=standard -DBTI=1 CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) @@ -39,7 +39,7 @@ BTI_OBJS = \ teststubs-bti.o \ trampoline-bti.o gen/btitest: $(BTI_OBJS) - $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ NOBTI_OBJS = \ test-nobti.o \ @@ -50,7 +50,7 @@ NOBTI_OBJS = \ teststubs-nobti.o \ trampoline-nobti.o gen/nobtitest: $(NOBTI_OBJS) - $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list # to account for any OUTPUT target-dirs optionally provided by diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index c50d86331ed2..ea947af63882 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -1,8 +1,13 @@ fp-pidbench fpsimd-test +rdvl-sme rdvl-sve sve-probe-vls sve-ptrace sve-test +ssve-test vec-syscfg vlset +za-fork +za-ptrace +za-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 95f0b877a060..a7c2286bf65b 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -1,24 +1,42 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include/ -TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg -TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \ - rdvl-sve \ - sve-test sve-stress \ +# A proper top_srcdir is needed by KSFT(lib.mk) +top_srcdir = $(realpath ../../../../../) + +CFLAGS += -I$(top_srcdir)/usr/include/ + +TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace +TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ + rdvl-sme rdvl-sve \ + sve-test \ + ssve-test \ + za-test \ vlset +TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress -all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) +EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o -fp-pidbench: fp-pidbench.S asm-utils.o +# Build with nolibc to avoid effects due to libc's clone() support +$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o $(CC) -nostdlib $^ -o $@ -fpsimd-test: fpsimd-test.o asm-utils.o +$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o +$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o +$(OUTPUT)/sve-ptrace: sve-ptrace.c +$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o +$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o $(CC) -nostdlib $^ -o $@ -rdvl-sve: rdvl-sve.o rdvl.o -sve-ptrace: sve-ptrace.o -sve-probe-vls: sve-probe-vls.o rdvl.o -sve-test: sve-test.o asm-utils.o +$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o + $(CC) -DSSVE -nostdlib $^ -o $@ +$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o +$(OUTPUT)/vlset: vlset.c +$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o + $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -include ../../../../include/nolibc/nolibc.h \ + -static -ffreestanding -Wall $^ -o $@ +$(OUTPUT)/za-ptrace: za-ptrace.c +$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o $(CC) -nostdlib $^ -o $@ -vec-syscfg: vec-syscfg.o rdvl.o -vlset: vlset.o include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c new file mode 100644 index 000000000000..49b0b2e08bac --- /dev/null +++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <stdio.h> + +#include "rdvl.h" + +int main(void) +{ + int vl = rdvl_sme(); + + printf("%d\n", vl); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S index c916c1c9defd..20dc29996dc6 100644 --- a/tools/testing/selftests/arm64/fp/rdvl.S +++ b/tools/testing/selftests/arm64/fp/rdvl.S @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2021 ARM Limited. +#include "sme-inst.h" + .arch_extension sve .globl rdvl_sve @@ -8,3 +10,11 @@ rdvl_sve: hint 34 // BTI C rdvl x0, #1 ret + +.globl rdvl_sme +rdvl_sme: + hint 34 // BTI C + + rdsvl 0, 1 + + ret diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h index 7c9d953fc9e7..5d323679fbc9 100644 --- a/tools/testing/selftests/arm64/fp/rdvl.h +++ b/tools/testing/selftests/arm64/fp/rdvl.h @@ -3,6 +3,7 @@ #ifndef RDVL_H #define RDVL_H +int rdvl_sme(void); int rdvl_sve(void); #endif diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h new file mode 100644 index 000000000000..7191e53ca1c0 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021-2 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> + +#ifndef SME_INST_H +#define SME_INST_H + +/* + * RDSVL X\nx, #\imm + */ +.macro rdsvl nx, imm + .inst 0x4bf5800 \ + | (\imm << 5) \ + | (\nx) +.endm + +.macro smstop + msr S0_3_C4_C6_3, xzr +.endm + +.macro smstart_za + msr S0_3_C4_C5_3, xzr +.endm + +.macro smstart_sm + msr S0_3_C4_C3_3, xzr +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +#endif diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress new file mode 100644 index 000000000000..e2bd2cc184ad --- /dev/null +++ b/tools/testing/selftests/arm64/fp/ssve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./ssve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 4c418b2021e0..8c4847977583 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -26,6 +26,10 @@ #define NT_ARM_SVE 0x405 #endif +#ifndef NT_ARM_SSVE +#define NT_ARM_SSVE 0x40b +#endif + struct vec_type { const char *name; unsigned long hwcap_type; @@ -42,11 +46,18 @@ static const struct vec_type vec_types[] = { .regset = NT_ARM_SVE, .prctl_set = PR_SVE_SET_VL, }, + { + .name = "Streaming SVE", + .hwcap_type = AT_HWCAP2, + .hwcap = HWCAP2_SME, + .regset = NT_ARM_SSVE, + .prctl_set = PR_SME_SET_VL, + }, }; -#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) +#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4) #define FLAG_TESTS 2 -#define FPSIMD_TESTS 3 +#define FPSIMD_TESTS 2 #define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) @@ -78,6 +89,15 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); } +static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) +{ + struct iovec iov; + + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); + return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); +} + static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, void **buf, size_t *size) { @@ -240,28 +260,24 @@ static void check_u32(unsigned int vl, const char *reg, /* Access the FPSIMD registers via the SVE regset */ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) { - void *svebuf = NULL; - size_t svebufsz = 0; + void *svebuf; struct user_sve_header *sve; struct user_fpsimd_state *fpsimd, new_fpsimd; unsigned int i, j; unsigned char *p; + int ret; - /* New process should start with FPSIMD registers only */ - sve = get_sve(child, type, &svebuf, &svebufsz); - if (!sve) { - ksft_test_result_fail("get_sve(%s): %s\n", - type->name, strerror(errno)); - + svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + if (!svebuf) { + ksft_test_result_fail("Failed to allocate FPSIMD buffer\n"); return; - } else { - ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name); } - ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Got FPSIMD registers via %s\n", type->name); - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) - goto out; + memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + sve = svebuf; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD); + sve->vl = 16; /* We don't care what the VL is */ /* Try to set a known FPSIMD state via PT_REGS_SVE */ fpsimd = (struct user_fpsimd_state *)((char *)sve + @@ -273,12 +289,11 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) p[j] = j; } - if (set_sve(child, type, sve)) { - ksft_test_result_fail("set_sve(%s FPSIMD): %s\n", - type->name, strerror(errno)); - + ret = set_sve(child, type, sve); + ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + type->name, ret); + if (ret) goto out; - } /* Verify via the FPSIMD regset */ if (get_fpsimd(child, &new_fpsimd)) { @@ -395,7 +410,7 @@ out: free(write_buf); } -/* Validate attempting to set SVE data and read SVE data */ +/* Validate attempting to set SVE data and read it via the FPSIMD regset */ static void ptrace_set_sve_get_fpsimd_data(pid_t child, const struct vec_type *type, unsigned int vl) @@ -478,6 +493,115 @@ out: free(write_buf); } +/* Validate attempting to set FPSIMD data and read it via the SVE regset */ +static void ptrace_set_fpsimd_get_sve_data(pid_t child, + const struct vec_type *type, + unsigned int vl) +{ + void *read_buf = NULL; + unsigned char *p; + struct user_sve_header *read_sve; + unsigned int vq = sve_vq_from_vl(vl); + struct user_fpsimd_state write_fpsimd; + int ret, i, j; + size_t read_sve_size = 0; + size_t expected_size; + int errors = 0; + + if (__BYTE_ORDER == __BIG_ENDIAN) { + ksft_test_result_skip("Big endian not supported\n"); + return; + } + + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&write_fpsimd.vregs[i]; + + for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j) + p[j] = j; + } + + ret = set_fpsimd(child, &write_fpsimd); + if (ret != 0) { + ksft_test_result_fail("Failed to set FPSIMD state: %d\n)", + ret); + return; + } + + if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u data\n", + type->name, vl); + return; + } + read_sve = read_buf; + + if (read_sve->vl != vl) { + ksft_test_result_fail("Child VL != expected VL %d\n", + read_sve->vl, vl); + goto out; + } + + /* The kernel may return either SVE or FPSIMD format */ + switch (read_sve->flags & SVE_PT_REGS_MASK) { + case SVE_PT_REGS_FPSIMD: + expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD); + if (read_sve_size < expected_size) { + ksft_test_result_fail("Read %d bytes, expected %d\n", + read_sve_size, expected_size); + goto out; + } + + ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET, + sizeof(write_fpsimd)); + if (ret != 0) { + ksft_print_msg("Read FPSIMD data mismatch\n"); + errors++; + } + break; + + case SVE_PT_REGS_SVE: + expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + if (read_sve_size < expected_size) { + ksft_test_result_fail("Read %d bytes, expected %d\n", + read_sve_size, expected_size); + goto out; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + __uint128_t tmp = 0; + + /* + * Z regs are stored endianness invariant, this won't + * work for big endian + */ + memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + sizeof(tmp)); + + if (tmp != write_fpsimd.vregs[i]) { + ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n", + type->name, vl, i, i); + errors++; + } + } + + check_u32(vl, "FPSR", &write_fpsimd.fpsr, + read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); + check_u32(vl, "FPCR", &write_fpsimd.fpcr, + read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); + break; + default: + ksft_print_msg("Unexpected regs type %d\n", + read_sve->flags & SVE_PT_REGS_MASK); + errors++; + break; + } + + ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n", + type->name, vl); + +out: + free(read_buf); +} + static int do_parent(pid_t child) { int ret = EXIT_FAILURE; @@ -548,11 +672,9 @@ static int do_parent(pid_t child) if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { ptrace_sve_fpsimd(child, &vec_types[i]); } else { - ksft_test_result_skip("%s FPSIMD get via SVE\n", - vec_types[i].name); ksft_test_result_skip("%s FPSIMD set via SVE\n", vec_types[i].name); - ksft_test_result_skip("%s set read via FPSIMD\n", + ksft_test_result_skip("%s FPSIMD read\n", vec_types[i].name); } @@ -585,11 +707,14 @@ static int do_parent(pid_t child) if (vl_supported) { ptrace_set_sve_get_sve_data(child, &vec_types[i], vl); ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl); + ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl); } else { ksft_test_result_skip("%s set SVE get SVE for VL %d\n", vec_types[i].name, vl); ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n", vec_types[i].name, vl); + ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n", + vec_types[i].name, vl); } } } diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f5b1b48ffff2..589264231a2d 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -13,6 +13,7 @@ #include <asm/unistd.h> #include "assembler.h" #include "asm-offsets.h" +#include "sme-inst.h" #define NZR 32 #define NPR 16 @@ -156,6 +157,7 @@ endfunction // We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr +#ifndef SSVE mov x4, x30 and w0, w0, #0x3 @@ -178,6 +180,9 @@ function setup_ffr wrffr p0.b ret x4 +#else + ret +#endif endfunction // Trivial memory compare: compare x2 bytes starting at address x0 with @@ -260,6 +265,7 @@ endfunction // Beware -- corrupts P0. // Clobbers x0-x5. function check_ffr +#ifndef SSVE mov x3, x30 ldr x4, =scratch @@ -280,6 +286,9 @@ function check_ffr mov x2, x5 mov x30, x3 b memcmp +#else + ret +#endif endfunction // Any SVE register modified here can cause corruption in the main @@ -295,10 +304,12 @@ function irritator_handler movi v0.8b, #1 movi v9.16b, #2 movi v31.8b, #3 +#ifndef SSVE // And P0 rdffr p0.b // And FFR wrffr p15.b +#endif ret endfunction @@ -359,6 +370,11 @@ endfunction .globl _start function _start _start: +#ifdef SSVE + puts "Streaming mode " + smstart_sm +#endif + // Sanity-check and report the vector length rdvl x19, #8 @@ -407,6 +423,10 @@ _start: orr w2, w2, #SA_NODEFER bl setsignal +#ifdef SSVE + smstart_sm // syscalls will have exited streaming mode +#endif + mov x22, #0 // generation number, increments per iteration .Ltest_loop: rdvl x0, #8 diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index c90658811a83..9bcfcdc34ee9 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -51,6 +51,16 @@ static struct vec_data vec_data[] = { .prctl_set = PR_SVE_SET_VL, .default_vl_file = "/proc/sys/abi/sve_default_vector_length", }, + { + .name = "SME", + .hwcap_type = AT_HWCAP2, + .hwcap = HWCAP2_SME, + .rdvl = rdvl_sme, + .rdvl_binary = "./rdvl-sme", + .prctl_get = PR_SME_GET_VL, + .prctl_set = PR_SME_SET_VL, + .default_vl_file = "/proc/sys/abi/sme_default_vector_length", + }, }; static int stdio_read_integer(FILE *f, const char *what, int *val) diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c index 308d27a68226..76912a581a95 100644 --- a/tools/testing/selftests/arm64/fp/vlset.c +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -22,12 +22,15 @@ static int inherit = 0; static int no_inherit = 0; static int force = 0; static unsigned long vl; +static int set_ctl = PR_SVE_SET_VL; +static int get_ctl = PR_SVE_GET_VL; static const struct option options[] = { { "force", no_argument, NULL, 'f' }, { "inherit", no_argument, NULL, 'i' }, { "max", no_argument, NULL, 'M' }, { "no-inherit", no_argument, &no_inherit, 1 }, + { "sme", no_argument, NULL, 's' }, { "help", no_argument, NULL, '?' }, {} }; @@ -50,6 +53,9 @@ static int parse_options(int argc, char **argv) case 'M': vl = SVE_VL_MAX; break; case 'f': force = 1; break; case 'i': inherit = 1; break; + case 's': set_ctl = PR_SME_SET_VL; + get_ctl = PR_SME_GET_VL; + break; case 0: break; default: goto error; } @@ -125,14 +131,14 @@ int main(int argc, char **argv) if (inherit) flags |= PR_SVE_VL_INHERIT; - t = prctl(PR_SVE_SET_VL, vl | flags); + t = prctl(set_ctl, vl | flags); if (t < 0) { fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", program_name, strerror(errno)); goto error; } - t = prctl(PR_SVE_GET_VL); + t = prctl(get_ctl); if (t == -1) { fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", program_name, strerror(errno)); diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S new file mode 100644 index 000000000000..2fafadd491c3 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. + +#include "sme-inst.h" + +.arch_extension sve + +#define MAGIC 42 + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +.pushsection .text +.data +.align 4 +scratch: + .space MAXVL_B +.popsection + +.globl fork_test +fork_test: + smstart_za + + // For simplicity just set one word in one vector, other tests + // cover general data corruption issues. + ldr x0, =scratch + mov x1, #MAGIC + str x1, [x0] + mov w12, wzr + _ldr_za 12, 0 // ZA.H[W12] loaded from [X0] + + // Tail call into the C portion that does the fork & verify + b fork_test_c + +.globl verify_fork +verify_fork: + // SVCR should have ZA=1, SM=0 + mrs x0, S3_3_C4_C2_2 + and x1, x0, #3 + cmp x1, #2 + beq 1f + mov x0, xzr + b 100f +1: + + // ZA should still have the value we loaded + ldr x0, =scratch + mov w12, wzr + _str_za 12, 0 // ZA.H[W12] stored to [X0] + ldr x1, [x0] + cmp x1, #MAGIC + beq 2f + mov x0, xzr + b 100f + +2: + // All tests passed + mov x0, #1 +100: + ret + diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c new file mode 100644 index 000000000000..ff475c649e96 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + * Original author: Mark Brown <broonie@kernel.org> + */ + +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/sched.h> +#include <linux/wait.h> + +#define EXPECTED_TESTS 1 + +static void putstr(const char *str) +{ + write(1, str, strlen(str)); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +static int tests_run; +static int tests_passed; +static int tests_failed; +static int tests_skipped; + +static void print_summary(void) +{ + if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) + putstr("# UNEXPECTED TEST COUNT: "); + + putstr("# Totals: pass:"); + putnum(tests_passed); + putstr(" fail:"); + putnum(tests_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(tests_skipped); + putstr(" error:0\n"); +} + +int fork_test(void); +int verify_fork(void); + +/* + * If we fork the value in the parent should be unchanged and the + * child should start with the same value. This is called from the + * fork_test() asm function. + */ +int fork_test_c(void) +{ + pid_t newpid, waiting; + int child_status, parent_result; + + newpid = fork(); + if (newpid == 0) { + /* In child */ + if (!verify_fork()) { + putstr("# ZA state invalid in child\n"); + exit(0); + } else { + exit(1); + } + } + if (newpid < 0) { + putstr("# fork() failed: -"); + putnum(-newpid); + putstr("\n"); + return 0; + } + + parent_result = verify_fork(); + if (!parent_result) + putstr("# ZA state invalid in parent\n"); + + for (;;) { + waiting = waitpid(newpid, &child_status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + putstr("# waitpid() failed: "); + putnum(errno); + putstr("\n"); + return 0; + } + if (waiting != newpid) { + putstr("# waitpid() returned wrong PID\n"); + return 0; + } + + if (!WIFEXITED(child_status)) { + putstr("# child did not exit\n"); + return 0; + } + + return WEXITSTATUS(child_status) && parent_result; + } +} + +#define run_test(name) \ + if (name()) { \ + tests_passed++; \ + } else { \ + tests_failed++; \ + putstr("not "); \ + } \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" " #name "\n"); + +int main(int argc, char **argv) +{ + int ret, i; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + putstr("# PID: "); + putnum(getpid()); + putstr("\n"); + + /* + * This test is run with nolibc which doesn't support hwcap and + * it's probably disproportionate to implement so instead check + * for the default vector length configuration in /proc. + */ + ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); + if (ret >= 0) { + run_test(fork_test); + + } else { + putstr("# SME support not present\n"); + + for (i = 0; i < EXPECTED_TESTS; i++) { + putstr("ok "); + putnum(i); + putstr(" skipped\n"); + } + + tests_skipped += EXPECTED_TESTS; + } + + print_summary(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c new file mode 100644 index 000000000000..bf6158654056 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_ZA +#define NT_ARM_ZA 0x40c +#endif + +#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) + +static void fill_buf(char *buf, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + buf[i] = random(); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size) +{ + struct user_za_header *za; + void *p; + size_t sz = sizeof(*za); + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov)) + goto error; + + za = *buf; + if (za->size <= sz) + break; + + sz = za->size; + } + + return za; + +error: + return NULL; +} + +static int set_za(pid_t pid, const struct user_za_header *za) +{ + struct iovec iov; + + iov.iov_base = (void *)za; + iov.iov_len = za->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov); +} + +/* Validate attempting to set the specfied VL via ptrace */ +static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) +{ + struct user_za_header za; + struct user_za_header *new_za = NULL; + size_t new_za_size = 0; + int ret, prctl_vl; + + *supported = false; + + /* Check if the VL is supported in this process */ + prctl_vl = prctl(PR_SME_SET_VL, vl); + if (prctl_vl == -1) + ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n", + strerror(errno), errno); + + /* If the VL is not supported then a supported VL will be returned */ + *supported = (prctl_vl == vl); + + /* Set the VL by doing a set with no register payload */ + memset(&za, 0, sizeof(za)); + za.size = sizeof(za); + za.vl = vl; + ret = set_za(child, &za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u\n", vl); + return; + } + + /* + * Read back the new register state and verify that we have the + * same VL that we got from prctl() on ourselves. + */ + if (!get_za(child, (void **)&new_za, &new_za_size)) { + ksft_test_result_fail("Failed to read VL %u\n", vl); + return; + } + + ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl); + + free(new_za); +} + +/* Validate attempting to set no ZA data and read it back */ +static void ptrace_set_no_data(pid_t child, unsigned int vl) +{ + void *read_buf = NULL; + struct user_za_header write_za; + struct user_za_header *read_za; + size_t read_za_size = 0; + int ret; + + /* Set up some data and write it out */ + memset(&write_za, 0, sizeof(write_za)); + write_za.size = ZA_PT_ZA_OFFSET; + write_za.vl = vl; + + ret = set_za(child, &write_za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u no data\n", vl); + return; + } + + /* Read the data back */ + if (!get_za(child, (void **)&read_buf, &read_za_size)) { + ksft_test_result_fail("Failed to read VL %u no data\n", vl); + return; + } + read_za = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_za->size < write_za.size) { + ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n", + vl, write_za.size, read_za->size); + goto out_read; + } + + ksft_test_result(read_za->size == write_za.size, + "Disabled ZA for VL %u\n", vl); + +out_read: + free(read_buf); +} + +/* Validate attempting to set data and read it back */ +static void ptrace_set_get_data(pid_t child, unsigned int vl) +{ + void *write_buf; + void *read_buf = NULL; + struct user_za_header *write_za; + struct user_za_header *read_za; + size_t read_za_size = 0; + unsigned int vq = sve_vq_from_vl(vl); + int ret; + size_t data_size; + + data_size = ZA_PT_SIZE(vq); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_za = write_buf; + + /* Set up some data and write it out */ + memset(write_za, 0, data_size); + write_za->size = data_size; + write_za->vl = vl; + + fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq)); + + ret = set_za(child, write_za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (!get_za(child, (void **)&read_buf, &read_za_size)) { + ksft_test_result_fail("Failed to read VL %u data\n", vl); + goto out; + } + read_za = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_za->size < write_za->size) { + ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n", + vl, write_za->size, read_za->size); + goto out_read; + } + + ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET, + read_buf + ZA_PT_ZA_OFFSET, + ZA_PT_ZA_SIZE(vq)) == 0, + "Data match for VL %u\n", vl); + +out_read: + free(read_buf); +out: + free(write_buf); +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + unsigned int vq, vl; + bool vl_supported; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* First, try to set this vector length */ + ptrace_set_get_vl(child, vl, &vl_supported); + + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_no_data(child, vl); + ptrace_set_get_data(child, vl); + } else { + ksft_test_result_skip("Disabled ZA for VL %u\n", vl); + ksft_test_result_skip("Get and set data for VL %u\n", + vl); + } + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) { + ksft_set_plan(1); + ksft_exit_skip("SME not available\n"); + } + + ksft_set_plan(EXPECTED_TESTS); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress new file mode 100644 index 000000000000..5ac386b55b95 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./za-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S new file mode 100644 index 000000000000..9ab6f9cd9623 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZA context switch test +// Repeatedly writes unique test patterns into each ZA tile +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" +#include "sme-inst.h" + +.arch_extension sve + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +// Declare some storage space to shadow ZA register contents and a +// scratch buffer for a vector. +.pushsection .text +.data +.align 4 +zaref: + .space MAXVL_B * MAXVL_B +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in ZA +// x0: pid +// x1: row in ZA +// x2: generation + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:16 pid +// bits 15: 8 row number +// bits 7: 0 32-bit lane index + +function pattern + mov w3, wzr + bfi w3, w0, #16, #12 // PID + bfi w3, w1, #8, #8 // Row + bfi w3, w2, #28, #4 // Generation + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w3, [x0], #4 + add w3, w3, #1 // Lane + subs w1, w1, #1 + b.ne 0b + + ret +endfunction + +// Get the address of shadow data for ZA horizontal vector xn +.macro _adrza xd, xn, nrtmp + ldr \xd, =zaref + rdsvl \nrtmp, 1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a ZA horizontal vector +// x0: pid +// x1: row number +// x2: generation +function setup_za + mov x4, x30 + mov x12, x1 // Use x12 for vector select + + bl pattern // Get pattern in scratch buffer + _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 + mov x5, x0 + ldr x1, =scratch + bl memcpy // length set up in x2 by _adrza + + _ldr_za 12, 5 // load vector w12 from pointer x5 + + ret x4 +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZA vector matches its shadow in memory, else abort +// x0: row number +// Clobbers x0-x7 and x12. +function check_za + mov x3, x30 + + mov x12, x0 + _adrza x5, x0, 6 // pointer to expected value in x5 + mov x4, x0 + ldr x7, =scratch // x7 is scratch + + mov x0, x7 // Poison scratch + mov x1, x6 + bl memfill_ae + + _str_za 12, 7 // save vector w12 to pointer x7 + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZA data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + puts "Streaming mode " + smstart_za + + // Sanity-check and report the vector length + + rdsvl 19, 8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdsvl 0, 8 + cmp x0, x19 + b.ne vl_barf + + rdsvl 21, 1 // Set up ZA & shadow with test pattern +0: mov x0, x20 + sub x1, x21, #1 + mov x2, x22 + bl setup_za + subs x21, x21, #1 + b.ne 0b + + and x8, x22, #127 // Every 128 interations... + cbz x8, 0f + mov x8, #__NR_getpid // (otherwise minimal syscall) + b 1f +0: + mov x8, #__NR_sched_yield // ...encourage preemption +1: + svc #0 + + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 + and x1, x0, #3 + cmp x1, #2 + b.ne svcr_barf + + rdsvl 21, 1 // Verify that the data made it through + rdsvl 24, 1 // Verify that the data made it through +0: sub x0, x24, x21 + bl check_za + subs x21, x21, #1 + bne 0b + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + smstop + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", row=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore index d1fe4ddf1669..052d0f9f92b3 100644 --- a/tools/testing/selftests/arm64/mte/.gitignore +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -3,5 +3,6 @@ check_gcr_el1_cswitch check_tags_inclusion check_child_memory check_mmap_options +check_prctl check_ksm_options check_user_mem diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 43bd94f853ba..7597fc632cad 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -85,9 +85,9 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping) { char *ptr; int run, result; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < item; run++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, @@ -107,7 +107,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping) { char *ptr, *map_ptr; int run, fd, map_size, result = KSFT_PASS; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < total; run++) { @@ -144,7 +144,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping) int main(int argc, char *argv[]) { int err; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); page_size = getpagesize(); if (!page_size) { diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c new file mode 100644 index 000000000000..f139a33a43ef --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2022 ARM Limited + +#include <stdbool.h> +#include <stdio.h> +#include <string.h> + +#include <sys/auxv.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest.h" + +static int set_tagged_addr_ctrl(int val) +{ + int ret; + + ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0); + if (ret < 0) + ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n", + ret, errno, strerror(errno)); + return ret; +} + +static int get_tagged_addr_ctrl(void) +{ + int ret; + + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + if (ret < 0) + ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n", + ret, errno, strerror(errno)); + return ret; +} + +/* + * Read the current mode without having done any configuration, should + * run first. + */ +void check_basic_read(void) +{ + int ret; + + ret = get_tagged_addr_ctrl(); + if (ret < 0) { + ksft_test_result_fail("check_basic_read\n"); + return; + } + + if (ret & PR_MTE_TCF_SYNC) + ksft_print_msg("SYNC enabled\n"); + if (ret & PR_MTE_TCF_ASYNC) + ksft_print_msg("ASYNC enabled\n"); + + /* Any configuration is valid */ + ksft_test_result_pass("check_basic_read\n"); +} + +/* + * Attempt to set a specified combination of modes. + */ +void set_mode_test(const char *name, int hwcap2, int mask) +{ + int ret; + + if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) { + ksft_test_result_skip("%s\n", name); + return; + } + + ret = set_tagged_addr_ctrl(mask); + if (ret < 0) { + ksft_test_result_fail("%s\n", name); + return; + } + + ret = get_tagged_addr_ctrl(); + if (ret < 0) { + ksft_test_result_fail("%s\n", name); + return; + } + + if ((ret & PR_MTE_TCF_MASK) == mask) { + ksft_test_result_pass("%s\n", name); + } else { + ksft_print_msg("Got %x, expected %x\n", + (ret & PR_MTE_TCF_MASK), mask); + ksft_test_result_fail("%s\n", name); + } +} + +struct mte_mode { + int mask; + int hwcap2; + const char *name; +} mte_modes[] = { + { PR_MTE_TCF_NONE, 0, "NONE" }, + { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" }, + { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" }, +}; + +int main(void) +{ + int i; + + ksft_print_header(); + ksft_set_plan(5); + + check_basic_read(); + for (i = 0; i < ARRAY_SIZE(mte_modes); i++) + set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, + mte_modes[i].mask); + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index deaef1f61076..2b1425b92b69 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -23,10 +23,13 @@ static int verify_mte_pointer_validity(char *ptr, int mode) { mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); /* Check the validity of the tagged pointer */ - memset((void *)ptr, '1', BUFFER_SIZE); + memset(ptr, '1', BUFFER_SIZE); mte_wait_after_trig(); - if (cur_mte_cxt.fault_valid) + if (cur_mte_cxt.fault_valid) { + ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n", + ptr, ptr + BUFFER_SIZE, mode); return KSFT_FAIL; + } /* Proceed further for nonzero tags */ if (!MT_FETCH_TAG((uintptr_t)ptr)) return KSFT_PASS; @@ -34,27 +37,32 @@ static int verify_mte_pointer_validity(char *ptr, int mode) /* Check the validity outside the range */ ptr[BUFFER_SIZE] = '2'; mte_wait_after_trig(); - if (!cur_mte_cxt.fault_valid) + if (!cur_mte_cxt.fault_valid) { + ksft_print_msg("No valid fault recorded for %p in mode %x\n", + ptr, mode); return KSFT_FAIL; - else + } else { return KSFT_PASS; + } } static int check_single_included_tags(int mem_type, int mode) { char *ptr; - int tag, run, result = KSFT_PASS; + int tag, run, ret, result = KSFT_PASS; - ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { - mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + if (ret != 0) + result = KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { - ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", @@ -66,7 +74,7 @@ static int check_single_included_tags(int mem_type, int mode) result = verify_mte_pointer_validity(ptr, mode); } } - mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); return result; } @@ -76,7 +84,7 @@ static int check_multiple_included_tags(int mem_type, int mode) int tag, run, result = KSFT_PASS; unsigned long excl_mask = 0; - ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -86,7 +94,7 @@ static int check_multiple_included_tags(int mem_type, int mode) mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { - ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", @@ -98,21 +106,23 @@ static int check_multiple_included_tags(int mem_type, int mode) result = verify_mte_pointer_validity(ptr, mode); } } - mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); return result; } static int check_all_included_tags(int mem_type, int mode) { char *ptr; - int run, result = KSFT_PASS; + int run, ret, result = KSFT_PASS; - ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; - mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + if (ret != 0) + return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); @@ -122,20 +132,22 @@ static int check_all_included_tags(int mem_type, int mode) */ result = verify_mte_pointer_validity(ptr, mode); } - mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); return result; } static int check_none_included_tags(int mem_type, int mode) { char *ptr; - int run; + int run, ret; - ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); + ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; - mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + if (ret != 0) + return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ for (run = 0; run < RUNS; run++) { ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); @@ -147,12 +159,12 @@ static int check_none_included_tags(int mem_type, int mode) } mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); /* Check the write validity of the untagged pointer */ - memset((void *)ptr, '1', BUFFER_SIZE); + memset(ptr, '1', BUFFER_SIZE); mte_wait_after_trig(); if (cur_mte_cxt.fault_valid) break; } - mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false); + mte_free_memory(ptr, BUFFER_SIZE, mem_type, false); if (cur_mte_cxt.fault_valid) return KSFT_FAIL; else diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 0328a1e08f65..00ffd34c66d3 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -37,6 +37,10 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) if (si->si_code == SEGV_MTEAERR) { if (cur_mte_cxt.trig_si_code == si->si_code) cur_mte_cxt.fault_valid = true; + else + ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, + addr); return; } /* Compare the context for precise error */ @@ -124,13 +128,16 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, int prot_flag, map_flag; size_t entire_size = size + range_before + range_after; - if (mem_type != USE_MALLOC && mem_type != USE_MMAP && - mem_type != USE_MPROTECT) { + switch (mem_type) { + case USE_MALLOC: + return malloc(entire_size) + range_before; + case USE_MMAP: + case USE_MPROTECT: + break; + default: ksft_print_msg("FAIL: Invalid allocate request\n"); return NULL; } - if (mem_type == USE_MALLOC) - return malloc(entire_size) + range_before; prot_flag = PROT_READ | PROT_WRITE; if (mem_type == USE_MMAP) @@ -269,18 +276,33 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) { unsigned long en = 0; - if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR || - mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) { - ksft_print_msg("FAIL: Invalid mte config option\n"); + switch (mte_option) { + case MTE_NONE_ERR: + case MTE_SYNC_ERR: + case MTE_ASYNC_ERR: + break; + default: + ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option); + return -EINVAL; + } + + if (incl_mask & ~MT_INCLUDE_TAG_MASK) { + ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask); return -EINVAL; } + en = PR_TAGGED_ADDR_ENABLE; - if (mte_option == MTE_SYNC_ERR) + switch (mte_option) { + case MTE_SYNC_ERR: en |= PR_MTE_TCF_SYNC; - else if (mte_option == MTE_ASYNC_ERR) + break; + case MTE_ASYNC_ERR: en |= PR_MTE_TCF_ASYNC; - else if (mte_option == MTE_NONE_ERR) + break; + case MTE_NONE_ERR: en |= PR_MTE_TCF_NONE; + break; + } en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 195a7d1879e6..2d3e71724e55 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -75,10 +75,21 @@ unsigned int mte_get_pstate_tco(void); /* Test framework static inline functions/macros */ static inline void evaluate_test(int err, const char *msg) { - if (err == KSFT_PASS) + switch (err) { + case KSFT_PASS: ksft_test_result_pass(msg); - else if (err == KSFT_FAIL) + break; + case KSFT_FAIL: ksft_test_result_fail(msg); + break; + case KSFT_SKIP: + ksft_test_result_skip(msg); + break; + default: + ksft_test_result_error("Unknown return code %d from %s", + err, msg); + break; + } } static inline int check_allocated_memory(void *ptr, size_t size, diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index c1742755abb9..e8d2b57f73ec 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only mangle_* fake_sigreturn_* +sme_* +ssve_* sve_* +za_* !*.[ch] diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index f909b70d9e98..c70fdec7d7c4 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -34,11 +34,15 @@ enum { FSSBS_BIT, FSVE_BIT, + FSME_BIT, + FSME_FA64_BIT, FMAX_END }; #define FEAT_SSBS (1UL << FSSBS_BIT) #define FEAT_SVE (1UL << FSVE_BIT) +#define FEAT_SME (1UL << FSME_BIT) +#define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) /* * A descriptor used to describe and configure a test case. diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 5743897984b0..b588d10afd5b 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -27,6 +27,8 @@ static int sig_copyctx = SIGTRAP; static char const *const feats_names[FMAX_END] = { " SSBS ", " SVE ", + " SME ", + " FA64 ", }; #define MAX_FEATS_SZ 128 @@ -268,6 +270,10 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SSBS; if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; + if (getauxval(AT_HWCAP2) & HWCAP2_SME) + td->feats_supported |= FEAT_SME; + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) + td->feats_supported |= FEAT_SME_FA64; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c new file mode 100644 index 000000000000..7ed762b7202f --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Attempt to change the streaming SVE vector length in a signal + * handler, this is not supported and is expected to segfault. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; +static unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls = 0; + +static bool sme_get_vls(struct tdescr *td) +{ + int vq, vl; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + return false; + + vl &= PR_SME_VL_LEN_MASK; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + /* We need at least two VLs */ + if (nvls < 2) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return false; + } + + return true; +} + +static int fake_sigreturn_ssve_change_vl(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + struct sve_context *sve; + + /* Get a signal context with a SME ZA frame in it */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, SVE_MAGIC, resv_sz, &offset); + if (!head) { + fprintf(stderr, "No SVE context\n"); + return 1; + } + + if (head->size != sizeof(struct sve_context)) { + fprintf(stderr, "Register data present, aborting\n"); + return 1; + } + + sve = (struct sve_context *)head; + + /* No changes are supported; init left us at minimum VL so go to max */ + fprintf(stderr, "Attempting to change VL from %d to %d\n", + sve->vl, vls[0]); + sve->vl = vls[0]; + + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_SSVE_CHANGE", + .descr = "Attempt to change Streaming SVE VL", + .feats_required = FEAT_SME, + .sig_ok = SIGSEGV, + .timeout = 3, + .init = sme_get_vls, + .run = fake_sigreturn_ssve_change_vl, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c new file mode 100644 index 000000000000..f9d76ae32bba --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using a streaming mode instruction without enabling it + * generates a SIGILL. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +int sme_trap_no_sm_trigger(struct tdescr *td) +{ + /* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */ + asm volatile(".inst 0xd503457f ; .inst 0xc0900000"); + + return 0; +} + +int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + return 1; +} + +struct tdescr tde = { + .name = "SME trap without SM", + .descr = "Check that we get a SIGILL if we use streaming mode without enabling it", + .timeout = 3, + .feats_required = FEAT_SME, /* We need a SMSTART ZA */ + .sanity_disabled = true, + .trigger = sme_trap_no_sm_trigger, + .run = sme_trap_no_sm_run, + .sig_ok = SIGILL, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c new file mode 100644 index 000000000000..e469ae5348e3 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using an instruction not supported in streaming mode + * traps when in streaming mode. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +int sme_trap_non_streaming_trigger(struct tdescr *td) +{ + /* + * The framework will handle SIGILL so we need to exit SM to + * stop any other code triggering a further SIGILL down the + * line from using a streaming-illegal instruction. + */ + asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \ + cnt v0.16b, v0.16b; \ + .inst 0xd503447f /* SMSTOP ZA */"); + + return 0; +} + +int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + return 1; +} + +struct tdescr tde = { + .name = "SME SM trap unsupported instruction", + .descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode", + .feats_required = FEAT_SME, + .feats_incompatible = FEAT_SME_FA64, + .timeout = 3, + .sanity_disabled = true, + .trigger = sme_trap_non_streaming_trigger, + .run = sme_trap_non_streaming_run, + .sig_ok = SIGILL, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c new file mode 100644 index 000000000000..3a7747af4715 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that accessing ZA without enabling it generates a SIGILL. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +int sme_trap_za_trigger(struct tdescr *td) +{ + /* ZERO ZA */ + asm volatile(".inst 0xc00800ff"); + + return 0; +} + +int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + return 1; +} + +struct tdescr tde = { + .name = "SME ZA trap", + .descr = "Check that we get a SIGILL if we access ZA without enabling", + .timeout = 3, + .sanity_disabled = true, + .trigger = sme_trap_za_trigger, + .run = sme_trap_za_run, + .sig_ok = SIGILL, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c new file mode 100644 index 000000000000..13ff3b35cbaf --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Check that the SME vector length reported in signal contexts is the + * expected one. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; +unsigned int vl; + +static bool get_sme_vl(struct tdescr *td) +{ + int ret = prctl(PR_SME_GET_VL); + if (ret == -1) + return false; + + vl = ret; + + return true; +} + +static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + struct za_context *za; + + /* Get a signal context which should have a ZA frame in it */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, ZA_MAGIC, resv_sz, &offset); + if (!head) { + fprintf(stderr, "No ZA context\n"); + return 1; + } + za = (struct za_context *)head; + + if (za->vl != vl) { + fprintf(stderr, "ZA sigframe VL %u, expected %u\n", + za->vl, vl); + return 1; + } else { + fprintf(stderr, "got expected VL %u\n", vl); + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "SME VL", + .descr = "Check that we get the right SME VL reported", + .feats_required = FEAT_SME, + .timeout = 3, + .init = get_sme_vl, + .run = sme_vl, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c new file mode 100644 index 000000000000..9022a6cab4b3 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that the streaming SVE register context in signal frames is + * set up as expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; +static unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls = 0; + +static bool sme_get_vls(struct tdescr *td) +{ + int vq, vl; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SME_SET_VL, vq * 16); + if (vl == -1) + return false; + + vl &= PR_SME_VL_LEN_MASK; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + /* We need at least one VL */ + if (nvls < 1) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return false; + } + + return true; +} + +static void setup_ssve_regs(void) +{ + /* smstart sm; real data is TODO */ + asm volatile(".inst 0xd503437f" : : : ); +} + +static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, + unsigned int vl) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + struct sve_context *ssve; + int ret; + + fprintf(stderr, "Testing VL %d\n", vl); + + ret = prctl(PR_SME_SET_VL, vl); + if (ret != vl) { + fprintf(stderr, "Failed to set VL, got %d\n", ret); + return 1; + } + + /* + * Get a signal context which should have a SVE frame and registers + * in it. + */ + setup_ssve_regs(); + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, SVE_MAGIC, resv_sz, &offset); + if (!head) { + fprintf(stderr, "No SVE context\n"); + return 1; + } + + ssve = (struct sve_context *)head; + if (ssve->vl != vl) { + fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl); + return 1; + } + + /* The actual size validation is done in get_current_context() */ + fprintf(stderr, "Got expected size %u and VL %d\n", + head->size, ssve->vl); + + return 0; +} + +static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + int i; + + for (i = 0; i < nvls; i++) { + /* + * TODO: the signal test helpers can't currently cope + * with signal frames bigger than struct sigcontext, + * skip VLs that will trigger that. + */ + if (vls[i] > 64) { + printf("Skipping VL %u due to stack size\n", vls[i]); + continue; + } + + if (do_one_sme_vl(td, si, uc, vls[i])) + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "Streaming SVE registers", + .descr = "Check that we get the right Streaming SVE registers reported", + /* + * We shouldn't require FA64 but things like memset() used in the + * helpers might use unsupported instructions so for now disable + * the test unless we've got the full instruction set. + */ + .feats_required = FEAT_SME | FEAT_SME_FA64, + .timeout = 3, + .init = sme_get_vls, + .run = sme_regs, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 8c2a57fc2f9c..84c36bee4d82 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -75,6 +75,31 @@ bool validate_sve_context(struct sve_context *sve, char **err) return true; } +bool validate_za_context(struct za_context *za, char **err) +{ + /* Size will be rounded up to a multiple of 16 bytes */ + size_t regs_size + = ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16; + + if (!za || !err) + return false; + + /* Either a bare za_context or a za_context followed by regs data */ + if ((za->head.size != sizeof(struct za_context)) && + (za->head.size != regs_size)) { + *err = "bad size for ZA context"; + return false; + } + + if (!sve_vl_valid(za->vl)) { + *err = "SME VL in ZA context invalid"; + + return false; + } + + return true; +} + bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) { bool terminated = false; @@ -82,6 +107,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) int flags = 0; struct extra_context *extra = NULL; struct sve_context *sve = NULL; + struct za_context *za = NULL; struct _aarch64_ctx *head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; @@ -120,6 +146,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) sve = (struct sve_context *)head; flags |= SVE_CTX; break; + case ZA_MAGIC: + if (flags & ZA_CTX) + *err = "Multiple ZA_MAGIC"; + /* Size is validated in validate_za_context() */ + za = (struct za_context *)head; + flags |= ZA_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; @@ -165,6 +198,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (flags & SVE_CTX) if (!validate_sve_context(sve, err)) return false; + if (flags & ZA_CTX) + if (!validate_za_context(za, err)) + return false; head = GET_RESV_NEXT_HEAD(head); } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index ad884c135314..49f1d5de7b5b 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -16,7 +16,8 @@ #define FPSIMD_CTX (1 << 0) #define SVE_CTX (1 << 1) -#define EXTRA_CTX (1 << 2) +#define ZA_CTX (1 << 2) +#define EXTRA_CTX (1 << 3) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c new file mode 100644 index 000000000000..b94e4f99fcac --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that the ZA register context in signal frames is set up as + * expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; +static unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls = 0; + +static bool sme_get_vls(struct tdescr *td) +{ + int vq, vl; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + return false; + + vl &= PR_SME_VL_LEN_MASK; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + /* We need at least one VL */ + if (nvls < 1) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return false; + } + + return true; +} + +static void setup_za_regs(void) +{ + /* smstart za; real data is TODO */ + asm volatile(".inst 0xd503457f" : : : ); +} + +static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, + unsigned int vl) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + struct za_context *za; + + fprintf(stderr, "Testing VL %d\n", vl); + + if (prctl(PR_SME_SET_VL, vl) != vl) { + fprintf(stderr, "Failed to set VL\n"); + return 1; + } + + /* + * Get a signal context which should have a SVE frame and registers + * in it. + */ + setup_za_regs(); + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, ZA_MAGIC, resv_sz, &offset); + if (!head) { + fprintf(stderr, "No ZA context\n"); + return 1; + } + + za = (struct za_context *)head; + if (za->vl != vl) { + fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl); + return 1; + } + + /* The actual size validation is done in get_current_context() */ + fprintf(stderr, "Got expected size %u and VL %d\n", + head->size, za->vl); + + return 0; +} + +static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + int i; + + for (i = 0; i < nvls; i++) { + /* + * TODO: the signal test helpers can't currently cope + * with signal frames bigger than struct sigcontext, + * skip VLs that will trigger that. + */ + if (vls[i] > 32) { + printf("Skipping VL %u due to stack size\n", vls[i]); + continue; + } + + if (do_one_sme_vl(td, si, uc, vls[i])) + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZA register", + .descr = "Check that we get the right ZA registers reported", + .feats_required = FEAT_SME, + .timeout = 3, + .init = sme_get_vls, + .run = sme_regs, +}; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index be9643ef6285..306ee1b01e72 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -4,3 +4,4 @@ test_core test_freezer test_kmem test_kill +test_cpu diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 745fe25fa0b9..478217cc1371 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -10,6 +10,7 @@ TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill +TEST_GEN_PROGS += test_cpu LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -20,3 +21,4 @@ $(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c $(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_cpu: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index dbaa7aabbb4a..4297d580e3f8 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -190,6 +190,18 @@ int cg_write(const char *cgroup, const char *control, char *buf) return -1; } +int cg_write_numeric(const char *cgroup, const char *control, long value) +{ + char buf[64]; + int ret; + + ret = sprintf(buf, "%lu", value); + if (ret < 0) + return ret; + + return cg_write(cgroup, control, buf); +} + int cg_find_unified_root(char *root, size_t len) { char buf[10 * PAGE_SIZE]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 628738532ac9..2ee2119281d7 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -8,6 +8,9 @@ #define MB(x) (x << 20) +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L + /* * Checks if two given values differ by less than err% of their sum. */ @@ -32,6 +35,7 @@ extern long cg_read_long(const char *cgroup, const char *control); long cg_read_key_long(const char *cgroup, const char *control, const char *key); extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); +int cg_write_numeric(const char *cgroup, const char *control, long value); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config new file mode 100644 index 000000000000..84fe884fad86 --- /dev/null +++ b/tools/testing/selftests/cgroup/config @@ -0,0 +1,8 @@ +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_MEMCG=y +CONFIG_MEMCG_KMEM=y +CONFIG_MEMCG_SWAP=y +CONFIG_PAGE_COUNTER=y diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c new file mode 100644 index 000000000000..24020a2c68dc --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <linux/limits.h> +#include <sys/sysinfo.h> +#include <sys/wait.h> +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <time.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +enum hog_clock_type { + // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock. + CPU_HOG_CLOCK_PROCESS, + // Count elapsed time using system wallclock time. + CPU_HOG_CLOCK_WALL, +}; + +struct cpu_hogger { + char *cgroup; + pid_t pid; + long usage; +}; + +struct cpu_hog_func_param { + int nprocs; + struct timespec ts; + enum hog_clock_type clock_type; +}; + +/* + * This test creates two nested cgroups with and without enabling + * the cpu controller. + */ +static int test_cpucg_subtree_control(const char *root) +{ + char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL; + int ret = KSFT_FAIL; + + // Create two nested cgroups with the cpu controller enabled. + parent = cg_name(root, "cpucg_test_0"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + child = cg_name(parent, "cpucg_test_child"); + if (!child) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_read_strstr(child, "cgroup.controllers", "cpu")) + goto cleanup; + + // Create two nested cgroups without enabling the cpu controller. + parent2 = cg_name(root, "cpucg_test_1"); + if (!parent2) + goto cleanup; + + if (cg_create(parent2)) + goto cleanup; + + child2 = cg_name(parent2, "cpucg_test_child"); + if (!child2) + goto cleanup; + + if (cg_create(child2)) + goto cleanup; + + if (!cg_read_strstr(child2, "cgroup.controllers", "cpu")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + free(child); + cg_destroy(child2); + free(child2); + cg_destroy(parent); + free(parent); + cg_destroy(parent2); + free(parent2); + + return ret; +} + +static void *hog_cpu_thread_func(void *arg) +{ + while (1) + ; + + return NULL; +} + +static struct timespec +timespec_sub(const struct timespec *lhs, const struct timespec *rhs) +{ + struct timespec zero = { + .tv_sec = 0, + .tv_nsec = 0, + }; + struct timespec ret; + + if (lhs->tv_sec < rhs->tv_sec) + return zero; + + ret.tv_sec = lhs->tv_sec - rhs->tv_sec; + + if (lhs->tv_nsec < rhs->tv_nsec) { + if (ret.tv_sec == 0) + return zero; + + ret.tv_sec--; + ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec; + } else + ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec; + + return ret; +} + +static int hog_cpus_timed(const char *cgroup, void *arg) +{ + const struct cpu_hog_func_param *param = + (struct cpu_hog_func_param *)arg; + struct timespec ts_run = param->ts; + struct timespec ts_remaining = ts_run; + struct timespec ts_start; + int i, ret; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts_start); + if (ret != 0) + return ret; + + for (i = 0; i < param->nprocs; i++) { + pthread_t tid; + + ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL); + if (ret != 0) + return ret; + } + + while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) { + struct timespec ts_total; + + ret = nanosleep(&ts_remaining, NULL); + if (ret && errno != EINTR) + return ret; + + if (param->clock_type == CPU_HOG_CLOCK_PROCESS) { + ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); + if (ret != 0) + return ret; + } else { + struct timespec ts_current; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts_current); + if (ret != 0) + return ret; + + ts_total = timespec_sub(&ts_current, &ts_start); + } + + ts_remaining = timespec_sub(&ts_run, &ts_total); + } + + return 0; +} + +/* + * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that + * cpu.stat shows the expected output. + */ +static int test_cpucg_stats(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec, system_usec; + long usage_seconds = 2; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec"); + if (usage_usec != 0 || user_usec != 0 || system_usec != 0) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_PROCESS, + }; + if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (!values_close(usage_usec, expected_usage_usec, 1)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + +static int +run_cpucg_weight_test( + const char *root, + pid_t (*spawn_child)(const struct cpu_hogger *child), + int (*validate)(const struct cpu_hogger *children, int num_children)) +{ + int ret = KSFT_FAIL, i; + char *parent = NULL; + struct cpu_hogger children[3] = {NULL}; + + parent = cg_name(root, "cpucg_test_0"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i); + if (!children[i].cgroup) + goto cleanup; + + if (cg_create(children[i].cgroup)) + goto cleanup; + + if (cg_write_numeric(children[i].cgroup, "cpu.weight", + 50 * (i + 1))) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + pid_t pid = spawn_child(&children[i]); + if (pid <= 0) + goto cleanup; + children[i].pid = pid; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + int retcode; + + waitpid(children[i].pid, &retcode, 0); + if (!WIFEXITED(retcode)) + goto cleanup; + if (WEXITSTATUS(retcode)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) + children[i].usage = cg_read_key_long(children[i].cgroup, + "cpu.stat", "usage_usec"); + + if (validate(children, ARRAY_SIZE(children))) + goto cleanup; + + ret = KSFT_PASS; +cleanup: + for (i = 0; i < ARRAY_SIZE(children); i++) { + cg_destroy(children[i].cgroup); + free(children[i].cgroup); + } + cg_destroy(parent); + free(parent); + + return ret; +} + +static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus) +{ + long usage_seconds = 10; + struct cpu_hog_func_param param = { + .nprocs = ncpus, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m); +} + +static pid_t weight_hog_all_cpus(const struct cpu_hogger *child) +{ + return weight_hog_ncpus(child, get_nprocs()); +} + +static int +overprovision_validate(const struct cpu_hogger *children, int num_children) +{ + int ret = KSFT_FAIL, i; + + for (i = 0; i < num_children - 1; i++) { + long delta; + + if (children[i + 1].usage <= children[i].usage) + goto cleanup; + + delta = children[i + 1].usage - children[i].usage; + if (!values_close(delta, children[0].usage, 35)) + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 50 + * A/C cpu.weight = 100 + * A/D cpu.weight = 150 + * + * A separate process is then created for each child cgroup which spawns as + * many threads as there are cores, and hogs each CPU as much as possible + * for some time interval. + * + * Once all of the children have exited, we verify that each child cgroup + * was given proportional runtime as informed by their cpu.weight. + */ +static int test_cpucg_weight_overprovisioned(const char *root) +{ + return run_cpucg_weight_test(root, weight_hog_all_cpus, + overprovision_validate); +} + +static pid_t weight_hog_one_cpu(const struct cpu_hogger *child) +{ + return weight_hog_ncpus(child, 1); +} + +static int +underprovision_validate(const struct cpu_hogger *children, int num_children) +{ + int ret = KSFT_FAIL, i; + + for (i = 0; i < num_children - 1; i++) { + if (!values_close(children[i + 1].usage, children[0].usage, 15)) + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 50 + * A/C cpu.weight = 100 + * A/D cpu.weight = 150 + * + * A separate process is then created for each child cgroup which spawns a + * single thread that hogs a CPU. The testcase is only run on systems that + * have at least one core per-thread in the child processes. + * + * Once all of the children have exited, we verify that each child cgroup + * had roughly the same runtime despite having different cpu.weight. + */ +static int test_cpucg_weight_underprovisioned(const char *root) +{ + // Only run the test if there are enough cores to avoid overprovisioning + // the system. + if (get_nprocs() < 4) + return KSFT_SKIP; + + return run_cpucg_weight_test(root, weight_hog_one_cpu, + underprovision_validate); +} + +static int +run_cpucg_nested_weight_test(const char *root, bool overprovisioned) +{ + int ret = KSFT_FAIL, i; + char *parent = NULL, *child = NULL; + struct cpu_hogger leaf[3] = {NULL}; + long nested_leaf_usage, child_usage; + int nprocs = get_nprocs(); + + if (!overprovisioned) { + if (nprocs < 4) + /* + * Only run the test if there are enough cores to avoid overprovisioning + * the system. + */ + return KSFT_SKIP; + nprocs /= 4; + } + + parent = cg_name(root, "cpucg_test"); + child = cg_name(parent, "cpucg_child"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + if (cg_write(child, "cgroup.subtree_control", "+cpu")) + goto cleanup; + if (cg_write(child, "cpu.weight", "1000")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + const char *ancestor; + long weight; + + if (i == 0) { + ancestor = parent; + weight = 1000; + } else { + ancestor = child; + weight = 5000; + } + leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i); + if (!leaf[i].cgroup) + goto cleanup; + + if (cg_create(leaf[i].cgroup)) + goto cleanup; + + if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + pid_t pid; + struct cpu_hog_func_param param = { + .nprocs = nprocs, + .ts = { + .tv_sec = 10, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + + pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed, + (void *)¶m); + if (pid <= 0) + goto cleanup; + leaf[i].pid = pid; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + int retcode; + + waitpid(leaf[i].pid, &retcode, 0); + if (!WIFEXITED(retcode)) + goto cleanup; + if (WEXITSTATUS(retcode)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + leaf[i].usage = cg_read_key_long(leaf[i].cgroup, + "cpu.stat", "usage_usec"); + if (leaf[i].usage <= 0) + goto cleanup; + } + + nested_leaf_usage = leaf[1].usage + leaf[2].usage; + if (overprovisioned) { + if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + goto cleanup; + } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) + goto cleanup; + + + child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); + if (child_usage <= 0) + goto cleanup; + if (!values_close(child_usage, nested_leaf_usage, 1)) + goto cleanup; + + ret = KSFT_PASS; +cleanup: + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + cg_destroy(leaf[i].cgroup); + free(leaf[i].cgroup); + } + cg_destroy(child); + free(child); + cg_destroy(parent); + free(parent); + + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 1000 + * A/C cpu.weight = 1000 + * A/C/D cpu.weight = 5000 + * A/C/E cpu.weight = 5000 + * + * A separate process is then created for each leaf, which spawn nproc threads + * that burn a CPU for a few seconds. + * + * Once all of those processes have exited, we verify that each of the leaf + * cgroups have roughly the same usage from cpu.stat. + */ +static int +test_cpucg_nested_weight_overprovisioned(const char *root) +{ + return run_cpucg_nested_weight_test(root, true); +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 1000 + * A/C cpu.weight = 1000 + * A/C/D cpu.weight = 5000 + * A/C/E cpu.weight = 5000 + * + * A separate process is then created for each leaf, which nproc / 4 threads + * that burns a CPU for a few seconds. + * + * Once all of those processes have exited, we verify that each of the leaf + * cgroups have roughly the same usage from cpu.stat. + */ +static int +test_cpucg_nested_weight_underprovisioned(const char *root) +{ + return run_cpucg_nested_weight_test(root, false); +} + +/* + * This test creates a cgroup with some maximum value within a period, and + * verifies that a process in the cgroup is not overscheduled. + */ +static int test_cpucg_max(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec; + long usage_seconds = 1; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + if (cg_write(cpucg, "cpu.max", "1000")) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (user_usec >= expected_usage_usec) + goto cleanup; + + if (values_close(usage_usec, expected_usage_usec, 95)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + +/* + * This test verifies that a process inside of a nested cgroup whose parent + * group has a cpu.max value set, is properly throttled. + */ +static int test_cpucg_max_nested(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec; + long usage_seconds = 1; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *parent, *child; + + parent = cg_name(root, "cpucg_parent"); + child = cg_name(parent, "cpucg_child"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_write(parent, "cpu.max", "1000")) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + if (cg_run(child, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (user_usec >= expected_usage_usec) + goto cleanup; + + if (values_close(usage_usec, expected_usage_usec, 95)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + free(child); + cg_destroy(parent); + free(parent); + + return ret; +} + +#define T(x) { x, #x } +struct cpucg_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cpucg_subtree_control), + T(test_cpucg_stats), + T(test_cpucg_weight_overprovisioned), + T(test_cpucg_weight_underprovisioned), + T(test_cpucg_nested_weight_overprovisioned), + T(test_cpucg_nested_weight_underprovisioned), + T(test_cpucg_max), + T(test_cpucg_max_nested), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "cpu")) + if (cg_write(root, "cgroup.subtree_control", "+cpu")) + ksft_exit_skip("Failed to set cpu controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh index 15d9d5896394..3c9c4554d5f6 100755 --- a/tools/testing/selftests/cgroup/test_stress.sh +++ b/tools/testing/selftests/cgroup/test_stress.sh @@ -1,4 +1,4 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -./with_stress.sh -s subsys -s fork ./test_core +./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 0315955ff0f4..bc1c407651fc 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -412,7 +412,8 @@ TEST(binderfs_stress) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); ASSERT_EQ(ret, 0) { - TH_LOG("%s - Failed to mount binderfs", strerror(errno)); + TH_LOG("%s - Failed to mount binderfs, check if CONFIG_ANDROID_BINDERFS is enabled in the running kernel", + strerror(errno)); } for (int i = 0; i < ARRAY_SIZE(fds); i++) { diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index dc7ade196798..459741565222 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -25,6 +25,9 @@ ppc*) s390*) ARG1=%r2 ;; +mips*) + ARG1=%r4 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 47d84b5cb6ca..d4662c8cf407 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -36,6 +36,10 @@ s390*) GOODREG=%r2 BADREG=%s2 ;; +mips*) + GOODREG=%r4 + BADREG=%r12 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh index b90dc9939f45..aff9299c9416 100755 --- a/tools/testing/selftests/ir/ir_loopback.sh +++ b/tools/testing/selftests/ir/ir_loopback.sh @@ -10,7 +10,7 @@ if [ $UID != 0 ]; then fi if ! /sbin/modprobe -q -n rc-loopback; then - echo "ir_loopback: module rc-loopback is not found [SKIP]" + echo "ir_loopback: module rc-loopback is not found in /lib/modules/`uname -r` [SKIP]" exit $ksft_skip fi diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index b8f248018174..33a0dbd26bd3 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -53,6 +53,21 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif +/* + * gcc cpuid.h provides __cpuid_count() since v4.4. + * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0. + * + * Provide local define for tests needing __cpuid_count() because + * selftests need to work in older environments that do not yet + * have __cpuid_count(). + */ +#ifndef __cpuid_count +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ __volatile__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif + /* define kselftest exit codes */ #define KSFT_PASS 0 #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 0d06ffa95d9d..93d77574b255 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm) return success; } -static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) +static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) { struct kvm_pmu_event_filter *f; int size = sizeof(*f) + nevents * sizeof(f->events[0]); @@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) return f; } -static struct kvm_pmu_event_filter *event_filter(uint32_t action) + +static struct kvm_pmu_event_filter * +create_pmu_event_filter(const uint64_t event_list[], + int nevents, uint32_t action) { struct kvm_pmu_event_filter *f; int i; - f = make_pmu_event_filter(ARRAY_SIZE(event_list)); + f = alloc_pmu_event_filter(nevents); f->action = action; - for (i = 0; i < ARRAY_SIZE(event_list); i++) + for (i = 0; i < nevents; i++) f->events[i] = event_list[i]; return f; } +static struct kvm_pmu_event_filter *event_filter(uint32_t action) +{ + return create_pmu_event_filter(event_list, + ARRAY_SIZE(event_list), + action); +} + /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. @@ -271,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm, return run_vm_to_sync(vm); } +static void test_amd_deny_list(struct kvm_vm *vm) +{ + uint64_t event = EVENT(0x1C2, 0); + struct kvm_pmu_event_filter *f; + uint64_t count; + + f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); + count = test_with_filter(vm, f); + + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + static void test_member_deny_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); @@ -453,6 +479,9 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } + if (use_amd_pmu()) + test_amd_deny_list(vm); + test_without_filter(vm); test_member_deny_list(vm); test_member_allow_list(vm); diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index ca40abe9daa8..da9290817866 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -18,10 +18,11 @@ #include "common.h" #ifndef O_PATH -#define O_PATH 010000000 +#define O_PATH 010000000 #endif -TEST(inconsistent_attr) { +TEST(inconsistent_attr) +{ const long page_size = sysconf(_SC_PAGESIZE); char *const buf = malloc(page_size + 1); struct landlock_ruleset_attr *const ruleset_attr = (void *)buf; @@ -34,20 +35,26 @@ TEST(inconsistent_attr) { ASSERT_EQ(EINVAL, errno); ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0)); ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0)); + ASSERT_EQ(EINVAL, errno); ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0)); /* The size if less than sizeof(struct landlock_attr_enforce). */ ASSERT_EQ(EFAULT, errno); - ASSERT_EQ(-1, landlock_create_ruleset(NULL, - sizeof(struct landlock_ruleset_attr), 0)); + ASSERT_EQ(-1, landlock_create_ruleset( + NULL, sizeof(struct landlock_ruleset_attr), 0)); ASSERT_EQ(EFAULT, errno); ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); ASSERT_EQ(E2BIG, errno); - ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, - sizeof(struct landlock_ruleset_attr), 0)); + /* Checks minimal valid attribute size. */ + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0)); + ASSERT_EQ(ENOMSG, errno); + ASSERT_EQ(-1, landlock_create_ruleset( + ruleset_attr, + sizeof(struct landlock_ruleset_attr), 0)); ASSERT_EQ(ENOMSG, errno); ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); ASSERT_EQ(ENOMSG, errno); @@ -63,38 +70,44 @@ TEST(inconsistent_attr) { free(buf); } -TEST(abi_version) { +TEST(abi_version) +{ const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(1, landlock_create_ruleset(NULL, 0, - LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(2, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, - LANDLOCK_CREATE_RULESET_VERSION)); + LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(EINVAL, errno); ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), - LANDLOCK_CREATE_RULESET_VERSION)); + LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(EINVAL, errno); - ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), - LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(-1, + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(EINVAL, errno); ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, - LANDLOCK_CREATE_RULESET_VERSION | 1 << 31)); + LANDLOCK_CREATE_RULESET_VERSION | + 1 << 31)); ASSERT_EQ(EINVAL, errno); } -TEST(inval_create_ruleset_flags) { +/* Tests ordering of syscall argument checks. */ +TEST(create_ruleset_checks_ordering) +{ const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; const int invalid_flag = last_flag << 1; + int ruleset_fd; const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; + /* Checks priority for invalid flags. */ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag)); ASSERT_EQ(EINVAL, errno); @@ -102,44 +115,121 @@ TEST(inval_create_ruleset_flags) { ASSERT_EQ(EINVAL, errno); ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), - invalid_flag)); + invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), + invalid_flag)); ASSERT_EQ(EINVAL, errno); - ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), invalid_flag)); + /* Checks too big ruleset_attr size. */ + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0)); + ASSERT_EQ(E2BIG, errno); + + /* Checks too small ruleset_attr size. */ + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0)); ASSERT_EQ(EINVAL, errno); + + /* Checks valid call. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); } -TEST(empty_path_beneath_attr) { +/* Tests ordering of syscall argument checks. */ +TEST(add_rule_checks_ordering) +{ const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, }; - const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + struct landlock_path_beneath_attr path_beneath_attr = { + .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE, + .parent_fd = -1, + }; + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); - /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */ + /* Checks invalid flags. */ + ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1)); + ASSERT_EQ(EINVAL, errno); + + /* Checks invalid ruleset FD. */ + ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0)); + ASSERT_EQ(EBADF, errno); + + /* Checks invalid rule type. */ + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0)); + ASSERT_EQ(EINVAL, errno); + + /* Checks invalid rule attr. */ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - NULL, 0)); + NULL, 0)); ASSERT_EQ(EFAULT, errno); + + /* Checks invalid path_beneath.parent_fd. */ + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); + ASSERT_EQ(EBADF, errno); + + /* Checks valid call. */ + path_beneath_attr.parent_fd = + open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath_attr.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); + ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); ASSERT_EQ(0, close(ruleset_fd)); } -TEST(inval_fd_enforce) { +/* Tests ordering of syscall argument and permission checks. */ +TEST(restrict_self_checks_ordering) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + struct landlock_path_beneath_attr path_beneath_attr = { + .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE, + .parent_fd = -1, + }; + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + path_beneath_attr.parent_fd = + open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath_attr.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); + ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); + + /* Checks unprivileged enforcement without no_new_privs. */ + drop_caps(_metadata); + ASSERT_EQ(-1, landlock_restrict_self(-1, -1)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, landlock_restrict_self(-1, 0)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + /* Checks invalid flags. */ + ASSERT_EQ(-1, landlock_restrict_self(-1, -1)); + ASSERT_EQ(EINVAL, errno); + + /* Checks invalid ruleset FD. */ ASSERT_EQ(-1, landlock_restrict_self(-1, 0)); ASSERT_EQ(EBADF, errno); -} - -TEST(unpriv_enforce_without_no_new_privs) { - int err; - drop_caps(_metadata); - err = landlock_restrict_self(-1, 0); - ASSERT_EQ(EPERM, errno); - ASSERT_EQ(err, -1); + /* Checks valid call. */ + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(0, close(ruleset_fd)); } TEST(ruleset_fd_io) @@ -151,8 +241,8 @@ TEST(ruleset_fd_io) char buf; drop_caps(_metadata); - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); ASSERT_EQ(-1, write(ruleset_fd, ".", 1)); @@ -197,14 +287,15 @@ TEST(ruleset_fd_transfer) drop_caps(_metadata); /* Creates a test ruleset with a simple rule. */ - ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd_tx = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd_tx); - path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW | - O_DIRECTORY | O_CLOEXEC); + path_beneath_attr.parent_fd = + open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, path_beneath_attr.parent_fd); - ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath_attr, 0)); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); cmsg = CMSG_FIRSTHDR(&msg); @@ -215,7 +306,8 @@ TEST(ruleset_fd_transfer) memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); /* Sends the ruleset FD over a socketpair and then close it. */ - ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, + socket_fds)); ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); ASSERT_EQ(0, close(socket_fds[0])); ASSERT_EQ(0, close(ruleset_fd_tx)); @@ -226,7 +318,8 @@ TEST(ruleset_fd_transfer) int ruleset_fd_rx; *(char *)msg.msg_iov->iov_base = '\0'; - ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); + ASSERT_EQ(sizeof(data_tx), + recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); ASSERT_EQ(0, close(socket_fds[1])); cmsg = CMSG_FIRSTHDR(&msg); diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 183b7e8e1b95..7ba18eb23783 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -25,6 +25,7 @@ * this to be possible, we must not call abort() but instead exit smoothly * (hence the step print). */ +/* clang-format off */ #define TEST_F_FORK(fixture_name, test_name) \ static void fixture_name##_##test_name##_child( \ struct __test_metadata *_metadata, \ @@ -71,11 +72,12 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ const FIXTURE_VARIANT(fixture_name) \ __attribute__((unused)) *variant) +/* clang-format on */ #ifndef landlock_create_ruleset -static inline int landlock_create_ruleset( - const struct landlock_ruleset_attr *const attr, - const size_t size, const __u32 flags) +static inline int +landlock_create_ruleset(const struct landlock_ruleset_attr *const attr, + const size_t size, const __u32 flags) { return syscall(__NR_landlock_create_ruleset, attr, size, flags); } @@ -83,17 +85,18 @@ static inline int landlock_create_ruleset( #ifndef landlock_add_rule static inline int landlock_add_rule(const int ruleset_fd, - const enum landlock_rule_type rule_type, - const void *const rule_attr, const __u32 flags) + const enum landlock_rule_type rule_type, + const void *const rule_attr, + const __u32 flags) { - return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, - rule_attr, flags); + return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr, + flags); } #endif #ifndef landlock_restrict_self static inline int landlock_restrict_self(const int ruleset_fd, - const __u32 flags) + const __u32 flags) { return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); } @@ -111,69 +114,76 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) }; cap_p = cap_get_proc(); - EXPECT_NE(NULL, cap_p) { + EXPECT_NE(NULL, cap_p) + { TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); } - EXPECT_NE(-1, cap_clear(cap_p)) { + EXPECT_NE(-1, cap_clear(cap_p)) + { TH_LOG("Failed to cap_clear: %s", strerror(errno)); } if (!drop_all) { EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED, - ARRAY_SIZE(caps), caps, CAP_SET)) { + ARRAY_SIZE(caps), caps, CAP_SET)) + { TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); } } - EXPECT_NE(-1, cap_set_proc(cap_p)) { + EXPECT_NE(-1, cap_set_proc(cap_p)) + { TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); } - EXPECT_NE(-1, cap_free(cap_p)) { + EXPECT_NE(-1, cap_free(cap_p)) + { TH_LOG("Failed to cap_free: %s", strerror(errno)); } } /* We cannot put such helpers in a library because of kselftest_harness.h . */ -__attribute__((__unused__)) -static void disable_caps(struct __test_metadata *const _metadata) +__attribute__((__unused__)) static void +disable_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, false); } -__attribute__((__unused__)) -static void drop_caps(struct __test_metadata *const _metadata) +__attribute__((__unused__)) static void +drop_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, true); } static void _effective_cap(struct __test_metadata *const _metadata, - const cap_value_t caps, const cap_flag_value_t value) + const cap_value_t caps, const cap_flag_value_t value) { cap_t cap_p; cap_p = cap_get_proc(); - EXPECT_NE(NULL, cap_p) { + EXPECT_NE(NULL, cap_p) + { TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); } - EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) { + EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) + { TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); } - EXPECT_NE(-1, cap_set_proc(cap_p)) { + EXPECT_NE(-1, cap_set_proc(cap_p)) + { TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); } - EXPECT_NE(-1, cap_free(cap_p)) { + EXPECT_NE(-1, cap_free(cap_p)) + { TH_LOG("Failed to cap_free: %s", strerror(errno)); } } -__attribute__((__unused__)) -static void set_cap(struct __test_metadata *const _metadata, - const cap_value_t caps) +__attribute__((__unused__)) static void +set_cap(struct __test_metadata *const _metadata, const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_SET); } -__attribute__((__unused__)) -static void clear_cap(struct __test_metadata *const _metadata, - const cap_value_t caps) +__attribute__((__unused__)) static void +clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_CLEAR); } diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 10c9a1e4ebd9..21a2ce8fa739 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -22,8 +22,21 @@ #include "common.h" -#define TMP_DIR "tmp" -#define BINARY_PATH "./true" +#ifndef renameat2 +int renameat2(int olddirfd, const char *oldpath, int newdirfd, + const char *newpath, unsigned int flags) +{ + return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath, + flags); +} +#endif + +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) +#endif + +#define TMP_DIR "tmp" +#define BINARY_PATH "./true" /* Paths (sibling number and depth) */ static const char dir_s1d1[] = TMP_DIR "/s1d1"; @@ -75,7 +88,7 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; */ static void mkdir_parents(struct __test_metadata *const _metadata, - const char *const path) + const char *const path) { char *walker; const char *parent; @@ -90,9 +103,10 @@ static void mkdir_parents(struct __test_metadata *const _metadata, continue; walker[i] = '\0'; err = mkdir(parent, 0700); - ASSERT_FALSE(err && errno != EEXIST) { - TH_LOG("Failed to create directory \"%s\": %s", - parent, strerror(errno)); + ASSERT_FALSE(err && errno != EEXIST) + { + TH_LOG("Failed to create directory \"%s\": %s", parent, + strerror(errno)); } walker[i] = '/'; } @@ -100,22 +114,24 @@ static void mkdir_parents(struct __test_metadata *const _metadata, } static void create_directory(struct __test_metadata *const _metadata, - const char *const path) + const char *const path) { mkdir_parents(_metadata, path); - ASSERT_EQ(0, mkdir(path, 0700)) { + ASSERT_EQ(0, mkdir(path, 0700)) + { TH_LOG("Failed to create directory \"%s\": %s", path, - strerror(errno)); + strerror(errno)); } } static void create_file(struct __test_metadata *const _metadata, - const char *const path) + const char *const path) { mkdir_parents(_metadata, path); - ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) { + ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) + { TH_LOG("Failed to create file \"%s\": %s", path, - strerror(errno)); + strerror(errno)); } } @@ -130,7 +146,7 @@ static int remove_path(const char *const path) goto out; } if (unlink(path) && rmdir(path)) { - if (errno != ENOENT) + if (errno != ENOENT && errno != ENOTDIR) err = errno; goto out; } @@ -221,8 +237,9 @@ static void remove_layout1(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(dir_s3d2)); } -FIXTURE(layout1) { -}; +/* clang-format off */ +FIXTURE(layout1) {}; +/* clang-format on */ FIXTURE_SETUP(layout1) { @@ -242,7 +259,8 @@ FIXTURE_TEARDOWN(layout1) * This helper enables to use the ASSERT_* macros and print the line number * pointing to the test caller. */ -static int test_open_rel(const int dirfd, const char *const path, const int flags) +static int test_open_rel(const int dirfd, const char *const path, + const int flags) { int fd; @@ -291,23 +309,23 @@ TEST_F_FORK(layout1, inval) { struct landlock_path_beneath_attr path_beneath = { .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, .parent_fd = -1, }; struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }; int ruleset_fd; - path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | - O_CLOEXEC); + path_beneath.parent_fd = + open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, path_beneath.parent_fd); ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, ruleset_fd); ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */ ASSERT_EQ(EBADF, errno); ASSERT_EQ(0, close(ruleset_fd)); @@ -315,55 +333,55 @@ TEST_F_FORK(layout1, inval) ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, ruleset_fd); ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); /* Returns EBADFD because ruleset_fd is not a valid ruleset. */ ASSERT_EQ(EBADFD, errno); ASSERT_EQ(0, close(ruleset_fd)); /* Gets a real ruleset. */ - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(0, close(path_beneath.parent_fd)); /* Tests without O_PATH. */ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, path_beneath.parent_fd); ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(0, close(path_beneath.parent_fd)); /* Tests with a ruleset FD. */ path_beneath.parent_fd = ruleset_fd; ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(EBADFD, errno); /* Checks unhandled allowed_access. */ - path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | - O_CLOEXEC); + path_beneath.parent_fd = + open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC); ASSERT_LE(0, path_beneath.parent_fd); /* Test with legitimate values. */ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE; ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(EINVAL, errno); path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE; /* Test with unknown (64-bits) value. */ path_beneath.allowed_access |= (1ULL << 60); ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(EINVAL, errno); path_beneath.allowed_access &= ~(1ULL << 60); /* Test with no access. */ path_beneath.allowed_access = 0; ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(ENOMSG, errno); path_beneath.allowed_access &= ~(1ULL << 60); @@ -376,12 +394,14 @@ TEST_F_FORK(layout1, inval) ASSERT_EQ(0, close(ruleset_fd)); } +/* clang-format off */ + #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE) -#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM +#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER #define ACCESS_ALL ( \ ACCESS_FILE | \ @@ -394,55 +414,90 @@ TEST_F_FORK(layout1, inval) LANDLOCK_ACCESS_FS_MAKE_SOCK | \ LANDLOCK_ACCESS_FS_MAKE_FIFO | \ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ + LANDLOCK_ACCESS_FS_MAKE_SYM | \ ACCESS_LAST) -TEST_F_FORK(layout1, file_access_rights) +/* clang-format on */ + +TEST_F_FORK(layout1, file_and_dir_access_rights) { __u64 access; int err; - struct landlock_path_beneath_attr path_beneath = {}; + struct landlock_path_beneath_attr path_beneath_file = {}, + path_beneath_dir = {}; struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = ACCESS_ALL, }; - const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); /* Tests access rights for files. */ - path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); - ASSERT_LE(0, path_beneath.parent_fd); + path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath_file.parent_fd); + + /* Tests access rights for directories. */ + path_beneath_dir.parent_fd = + open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath_dir.parent_fd); + for (access = 1; access <= ACCESS_LAST; access <<= 1) { - path_beneath.allowed_access = access; + path_beneath_dir.allowed_access = access; + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, + LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_dir, 0)); + + path_beneath_file.allowed_access = access; err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0); - if ((access | ACCESS_FILE) == ACCESS_FILE) { + &path_beneath_file, 0); + if (access & ACCESS_FILE) { ASSERT_EQ(0, err); } else { ASSERT_EQ(-1, err); ASSERT_EQ(EINVAL, errno); } } - ASSERT_EQ(0, close(path_beneath.parent_fd)); + ASSERT_EQ(0, close(path_beneath_file.parent_fd)); + ASSERT_EQ(0, close(path_beneath_dir.parent_fd)); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, unknown_access_rights) +{ + __u64 access_mask; + + for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST; + access_mask >>= 1) { + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = access_mask, + }; + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0)); + ASSERT_EQ(EINVAL, errno); + } } static void add_path_beneath(struct __test_metadata *const _metadata, - const int ruleset_fd, const __u64 allowed_access, - const char *const path) + const int ruleset_fd, const __u64 allowed_access, + const char *const path) { struct landlock_path_beneath_attr path_beneath = { .allowed_access = allowed_access, }; path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC); - ASSERT_LE(0, path_beneath.parent_fd) { + ASSERT_LE(0, path_beneath.parent_fd) + { TH_LOG("Failed to open directory \"%s\": %s", path, - strerror(errno)); + strerror(errno)); } ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)) { + &path_beneath, 0)) + { TH_LOG("Failed to update the ruleset with \"%s\": %s", path, - strerror(errno)); + strerror(errno)); } ASSERT_EQ(0, close(path_beneath.parent_fd)); } @@ -452,6 +507,8 @@ struct rule { __u64 access; }; +/* clang-format off */ + #define ACCESS_RO ( \ LANDLOCK_ACCESS_FS_READ_FILE | \ LANDLOCK_ACCESS_FS_READ_DIR) @@ -460,39 +517,46 @@ struct rule { ACCESS_RO | \ LANDLOCK_ACCESS_FS_WRITE_FILE) +/* clang-format on */ + static int create_ruleset(struct __test_metadata *const _metadata, - const __u64 handled_access_fs, const struct rule rules[]) + const __u64 handled_access_fs, + const struct rule rules[]) { int ruleset_fd, i; struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = handled_access_fs, }; - ASSERT_NE(NULL, rules) { + ASSERT_NE(NULL, rules) + { TH_LOG("No rule list"); } - ASSERT_NE(NULL, rules[0].path) { + ASSERT_NE(NULL, rules[0].path) + { TH_LOG("Empty rule list"); } - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); - ASSERT_LE(0, ruleset_fd) { + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd) + { TH_LOG("Failed to create a ruleset: %s", strerror(errno)); } for (i = 0; rules[i].path; i++) { add_path_beneath(_metadata, ruleset_fd, rules[i].access, - rules[i].path); + rules[i].path); } return ruleset_fd; } static void enforce_ruleset(struct __test_metadata *const _metadata, - const int ruleset_fd) + const int ruleset_fd) { ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); - ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) { + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) + { TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); } } @@ -503,13 +567,14 @@ TEST_F_FORK(layout1, proc_nsfs) { .path = "/dev/null", .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; struct landlock_path_beneath_attr path_beneath; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access | - LANDLOCK_ACCESS_FS_READ_DIR, rules); + const int ruleset_fd = create_ruleset( + _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR, + rules); ASSERT_LE(0, ruleset_fd); ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); @@ -536,22 +601,23 @@ TEST_F_FORK(layout1, proc_nsfs) * references to a ruleset. */ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC); ASSERT_LE(0, path_beneath.parent_fd); ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, - &path_beneath, 0)); + &path_beneath, 0)); ASSERT_EQ(EBADFD, errno); ASSERT_EQ(0, close(path_beneath.parent_fd)); } -TEST_F_FORK(layout1, unpriv) { +TEST_F_FORK(layout1, unpriv) +{ const struct rule rules[] = { { .path = dir_s1d2, .access = ACCESS_RO, }, - {} + {}, }; int ruleset_fd; @@ -577,9 +643,9 @@ TEST_F_FORK(layout1, effective_access) { .path = file1_s2d2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); char buf; @@ -589,17 +655,23 @@ TEST_F_FORK(layout1, effective_access) enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); - /* Tests on a directory. */ + /* Tests on a directory (with or without O_PATH). */ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH)); ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH)); ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); - /* Tests on a file. */ + /* Tests on a file (with or without O_PATH). */ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); /* Checks effective read and write actions. */ @@ -626,7 +698,7 @@ TEST_F_FORK(layout1, unhandled_access) .path = dir_s1d2, .access = ACCESS_RO, }, - {} + {}, }; /* Here, we only handle read accesses, not write accesses. */ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); @@ -653,14 +725,14 @@ TEST_F_FORK(layout1, ruleset_overlap) { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_READ_DIR, + LANDLOCK_ACCESS_FS_READ_DIR, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -687,6 +759,113 @@ TEST_F_FORK(layout1, ruleset_overlap) ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); } +TEST_F_FORK(layout1, layer_rule_unions) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */ + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const struct rule layer2[] = { + /* Doesn't change anything from layer1. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const struct rule layer3[] = { + /* Only allows write (but not read) to dir_s1d3. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks s1d1 hierarchy with layer1. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d2 hierarchy with layer1. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d3 hierarchy with layer1. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); + /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Doesn't change anything from layer1. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks s1d1 hierarchy with layer2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d2 hierarchy with layer2. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d3 hierarchy with layer2. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); + /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Only allows write (but not read) to dir_s1d3. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks s1d1 hierarchy with layer3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d2 hierarchy with layer3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d3 hierarchy with layer3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); + /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); +} + TEST_F_FORK(layout1, non_overlapping_accesses) { const struct rule layer1[] = { @@ -694,22 +873,22 @@ TEST_F_FORK(layout1, non_overlapping_accesses) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_MAKE_REG, }, - {} + {}, }; const struct rule layer2[] = { { .path = dir_s1d3, .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, }, - {} + {}, }; int ruleset_fd; ASSERT_EQ(0, unlink(file1_s1d1)); ASSERT_EQ(0, unlink(file1_s1d2)); - ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, - layer1); + ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -720,7 +899,7 @@ TEST_F_FORK(layout1, non_overlapping_accesses) ASSERT_EQ(0, unlink(file1_s1d2)); ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE, - layer2); + layer2); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -758,7 +937,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = file1_s1d3, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; /* First rule with write restrictions. */ const struct rule layer2_read_write[] = { @@ -766,14 +945,14 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) { .path = dir_s1d3, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, /* ...but also denies read access via its grandparent directory. */ { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; const struct rule layer3_read[] = { /* Allows read access via its great-grandparent directory. */ @@ -781,7 +960,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = dir_s1d1, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; const struct rule layer4_read_write[] = { /* @@ -792,7 +971,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; const struct rule layer5_read[] = { /* @@ -803,7 +982,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; const struct rule layer6_execute[] = { /* @@ -814,7 +993,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = dir_s2d1, .access = LANDLOCK_ACCESS_FS_EXECUTE, }, - {} + {}, }; const struct rule layer7_read_write[] = { /* @@ -825,12 +1004,12 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; int ruleset_fd; ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, - layer1_read); + layer1_read); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -840,8 +1019,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); - ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write); + ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + layer2_read_write); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -852,7 +1033,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, - layer3_read); + layer3_read); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -863,8 +1044,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); /* This time, denies write access for the file hierarchy. */ - ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write); + ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + layer4_read_write); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -879,7 +1062,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, - layer5_read); + layer5_read); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -891,7 +1074,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE, - layer6_execute); + layer6_execute); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -902,8 +1085,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses) ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); - ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write); + ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + layer7_read_write); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -921,9 +1106,9 @@ TEST_F_FORK(layout1, inherit_subset) { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_READ_DIR, + LANDLOCK_ACCESS_FS_READ_DIR, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -949,7 +1134,7 @@ TEST_F_FORK(layout1, inherit_subset) * ANDed with the previous ones. */ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, - dir_s1d2); + dir_s1d2); /* * According to ruleset_fd, dir_s1d2 should now have the * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE @@ -1004,7 +1189,7 @@ TEST_F_FORK(layout1, inherit_subset) * that there was no rule tied to it before. */ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, - dir_s1d3); + dir_s1d3); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -1039,7 +1224,7 @@ TEST_F_FORK(layout1, inherit_superset) .path = dir_s1d3, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1054,8 +1239,10 @@ TEST_F_FORK(layout1, inherit_superset) ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */ - add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2); + add_path_beneath(_metadata, ruleset_fd, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + dir_s1d2); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -1075,12 +1262,12 @@ TEST_F_FORK(layout1, max_layers) .path = dir_s1d2, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ASSERT_LE(0, ruleset_fd); - for (i = 0; i < 64; i++) + for (i = 0; i < 16; i++) enforce_ruleset(_metadata, ruleset_fd); for (i = 0; i < 2; i++) { @@ -1097,15 +1284,15 @@ TEST_F_FORK(layout1, empty_or_same_ruleset) int ruleset_fd; /* Tests empty handled_access_fs. */ - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(-1, ruleset_fd); ASSERT_EQ(ENOMSG, errno); /* Enforces policy which deny read access to all files. */ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE; - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); @@ -1113,8 +1300,8 @@ TEST_F_FORK(layout1, empty_or_same_ruleset) /* Nests a policy which deny read access to all directories. */ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR; - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); @@ -1137,7 +1324,7 @@ TEST_F_FORK(layout1, rule_on_mountpoint) .path = dir_s3d2, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1166,7 +1353,7 @@ TEST_F_FORK(layout1, rule_over_mountpoint) .path = dir_s3d1, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1194,7 +1381,7 @@ TEST_F_FORK(layout1, rule_over_root_allow_then_deny) .path = "/", .access = ACCESS_RO, }, - {} + {}, }; int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1224,7 +1411,7 @@ TEST_F_FORK(layout1, rule_over_root_deny) .path = "/", .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1244,12 +1431,13 @@ TEST_F_FORK(layout1, rule_inside_mount_ns) .path = "s3d3", .access = ACCESS_RO, }, - {} + {}, }; int ruleset_fd; set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) { + ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3)) + { TH_LOG("Failed to pivot root: %s", strerror(errno)); }; ASSERT_EQ(0, chdir("/")); @@ -1271,7 +1459,7 @@ TEST_F_FORK(layout1, mount_and_pivot) .path = dir_s3d2, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1282,7 +1470,7 @@ TEST_F_FORK(layout1, mount_and_pivot) set_cap(_metadata, CAP_SYS_ADMIN); ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL)); ASSERT_EQ(EPERM, errno); - ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)); + ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3)); ASSERT_EQ(EPERM, errno); clear_cap(_metadata, CAP_SYS_ADMIN); } @@ -1294,28 +1482,29 @@ TEST_F_FORK(layout1, move_mount) .path = dir_s3d2, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ASSERT_LE(0, ruleset_fd); set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, - dir_s1d2, 0)) { + ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)) + { TH_LOG("Failed to move mount: %s", strerror(errno)); } - ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, - dir_s3d2, 0)); + ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, + dir_s3d2, 0)); clear_cap(_metadata, CAP_SYS_ADMIN); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, - dir_s1d2, 0)); + ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)); ASSERT_EQ(EPERM, errno); clear_cap(_metadata, CAP_SYS_ADMIN); } @@ -1335,7 +1524,7 @@ TEST_F_FORK(layout1, release_inodes) .path = dir_s3d3, .access = ACCESS_RO, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); @@ -1362,7 +1551,7 @@ enum relative_access { }; static void test_relative_path(struct __test_metadata *const _metadata, - const enum relative_access rel) + const enum relative_access rel) { /* * Common layer to check that chroot doesn't ignore it (i.e. a chroot @@ -1373,7 +1562,7 @@ static void test_relative_path(struct __test_metadata *const _metadata, .path = TMP_DIR, .access = ACCESS_RO, }, - {} + {}, }; const struct rule layer2_subs[] = { { @@ -1384,7 +1573,7 @@ static void test_relative_path(struct __test_metadata *const _metadata, .path = dir_s2d2, .access = ACCESS_RO, }, - {} + {}, }; int dirfd, ruleset_fd; @@ -1425,14 +1614,16 @@ static void test_relative_path(struct __test_metadata *const _metadata, break; case REL_CHROOT_ONLY: /* Do chroot into dir_s1d2 (relative to dir_s2d2). */ - ASSERT_EQ(0, chroot("../../s1d1/s1d2")) { + ASSERT_EQ(0, chroot("../../s1d1/s1d2")) + { TH_LOG("Failed to chroot: %s", strerror(errno)); } dirfd = AT_FDCWD; break; case REL_CHROOT_CHDIR: /* Do chroot into dir_s1d2. */ - ASSERT_EQ(0, chroot(".")) { + ASSERT_EQ(0, chroot(".")) + { TH_LOG("Failed to chroot: %s", strerror(errno)); } dirfd = AT_FDCWD; @@ -1440,7 +1631,7 @@ static void test_relative_path(struct __test_metadata *const _metadata, } ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES, - test_open_rel(dirfd, "..", O_RDONLY)); + test_open_rel(dirfd, "..", O_RDONLY)); ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY)); if (rel == REL_CHROOT_ONLY) { @@ -1462,11 +1653,13 @@ static void test_relative_path(struct __test_metadata *const _metadata, if (rel != REL_CHROOT_CHDIR) { ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY)); ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY)); - ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", + O_RDONLY)); ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY)); ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY)); - ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", + O_RDONLY)); } if (rel == REL_OPEN) @@ -1495,40 +1688,42 @@ TEST_F_FORK(layout1, relative_chroot_chdir) } static void copy_binary(struct __test_metadata *const _metadata, - const char *const dst_path) + const char *const dst_path) { int dst_fd, src_fd; struct stat statbuf; dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC); - ASSERT_LE(0, dst_fd) { - TH_LOG("Failed to open \"%s\": %s", dst_path, - strerror(errno)); + ASSERT_LE(0, dst_fd) + { + TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno)); } src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC); - ASSERT_LE(0, src_fd) { + ASSERT_LE(0, src_fd) + { TH_LOG("Failed to open \"" BINARY_PATH "\": %s", - strerror(errno)); + strerror(errno)); } ASSERT_EQ(0, fstat(src_fd, &statbuf)); - ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0, - statbuf.st_size)); + ASSERT_EQ(statbuf.st_size, + sendfile(dst_fd, src_fd, 0, statbuf.st_size)); ASSERT_EQ(0, close(src_fd)); ASSERT_EQ(0, close(dst_fd)); } -static void test_execute(struct __test_metadata *const _metadata, - const int err, const char *const path) +static void test_execute(struct __test_metadata *const _metadata, const int err, + const char *const path) { int status; - char *const argv[] = {(char *)path, NULL}; + char *const argv[] = { (char *)path, NULL }; const pid_t child = fork(); ASSERT_LE(0, child); if (child == 0) { - ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) { + ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) + { TH_LOG("Failed to execute \"%s\": %s", path, - strerror(errno)); + strerror(errno)); }; ASSERT_EQ(err, errno); _exit(_metadata->passed ? 2 : 1); @@ -1536,9 +1731,10 @@ static void test_execute(struct __test_metadata *const _metadata, } ASSERT_EQ(child, waitpid(child, &status, 0)); ASSERT_EQ(1, WIFEXITED(status)); - ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { + ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) + { TH_LOG("Unexpected return code for \"%s\": %s", path, - strerror(errno)); + strerror(errno)); }; } @@ -1549,10 +1745,10 @@ TEST_F_FORK(layout1, execute) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_EXECUTE, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); copy_binary(_metadata, file1_s1d1); @@ -1577,15 +1773,21 @@ TEST_F_FORK(layout1, execute) TEST_F_FORK(layout1, link) { - const struct rule rules[] = { + const struct rule layer1[] = { { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_MAKE_REG, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const struct rule layer2[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {}, + }; + int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1); ASSERT_LE(0, ruleset_fd); @@ -1598,14 +1800,30 @@ TEST_F_FORK(layout1, link) ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); ASSERT_EQ(EACCES, errno); + /* Denies linking because of reparenting. */ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2)); ASSERT_EQ(EXDEV, errno); ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3)); ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); ASSERT_EQ(0, link(file2_s1d2, file1_s1d2)); ASSERT_EQ(0, link(file2_s1d3, file1_s1d3)); + + /* Prepares for next unlinks. */ + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that linkind doesn't require the ability to delete a file. */ + ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); + ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); } TEST_F_FORK(layout1, rename_file) @@ -1619,14 +1837,13 @@ TEST_F_FORK(layout1, rename_file) .path = dir_s2d2, .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); - ASSERT_EQ(0, unlink(file1_s1d1)); ASSERT_EQ(0, unlink(file1_s1d2)); enforce_ruleset(_metadata, ruleset_fd); @@ -1662,9 +1879,15 @@ TEST_F_FORK(layout1, rename_file) ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1, RENAME_EXCHANGE)); ASSERT_EQ(EACCES, errno); + /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */ + ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1)); + ASSERT_EQ(EACCES, errno); ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2, RENAME_EXCHANGE)); ASSERT_EQ(EACCES, errno); + /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */ + ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2)); + ASSERT_EQ(EACCES, errno); /* Renames files with different parents. */ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2)); @@ -1675,14 +1898,14 @@ TEST_F_FORK(layout1, rename_file) /* Exchanges and renames files with same parent. */ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3, - RENAME_EXCHANGE)); + RENAME_EXCHANGE)); ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3)); /* Exchanges files and directories with same parent, twice. */ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, - RENAME_EXCHANGE)); + RENAME_EXCHANGE)); ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, - RENAME_EXCHANGE)); + RENAME_EXCHANGE)); } TEST_F_FORK(layout1, rename_dir) @@ -1696,10 +1919,10 @@ TEST_F_FORK(layout1, rename_dir) .path = dir_s2d1, .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); @@ -1727,22 +1950,743 @@ TEST_F_FORK(layout1, rename_dir) ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1, RENAME_EXCHANGE)); ASSERT_EQ(EACCES, errno); + /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */ + ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1)); + ASSERT_EQ(EACCES, errno); ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2, RENAME_EXCHANGE)); ASSERT_EQ(EACCES, errno); + /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */ + ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2)); + ASSERT_EQ(EACCES, errno); /* * Exchanges and renames directory to the same parent, which allows * directory removal. */ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2, - RENAME_EXCHANGE)); + RENAME_EXCHANGE)); ASSERT_EQ(0, unlink(dir_s1d3)); ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3)); ASSERT_EQ(0, rmdir(dir_s1d3)); } +TEST_F_FORK(layout1, reparent_refer) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + {}, + }; + int ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2)); + ASSERT_EQ(EXDEV, errno); + /* + * Moving should only be allowed when the source and the destination + * parent directory have REFER. + */ + ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3)); + ASSERT_EQ(ENOTEMPTY, errno); + ASSERT_EQ(0, unlink(file1_s2d3)); + ASSERT_EQ(0, unlink(file2_s2d3)); + ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3)); +} + +TEST_F_FORK(layout1, reparent_link) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d3, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {}, + }; + const int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + /* Denies linking because of missing MAKE_REG. */ + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Denies linking because of missing source and destination REFER. */ + ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + /* Denies linking because of missing source REFER. */ + ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + /* Denies linking because of missing MAKE_REG. */ + ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Denies linking because of missing destination REFER. */ + ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + /* Allows linking because of REFER and MAKE_REG. */ + ASSERT_EQ(0, link(file1_s2d2, file1_s1d3)); + ASSERT_EQ(0, unlink(file1_s2d2)); + /* Reverse linking denied because of missing MAKE_REG. */ + ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s2d3)); + /* Checks reverse linking. */ + ASSERT_EQ(0, link(file1_s1d3, file1_s2d3)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + /* + * This is OK for a file link, but it should not be allowed for a + * directory rename (because of the superset of access rights. + */ + ASSERT_EQ(0, link(file1_s2d3, file1_s1d3)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(0, link(file2_s1d2, file1_s1d2)); + ASSERT_EQ(0, link(file2_s1d3, file1_s1d3)); +} + +TEST_F_FORK(layout1, reparent_rename) +{ + /* Same rules as for reparent_link. */ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d3, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {}, + }; + const int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + /* Denies renaming because of missing MAKE_REG. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Even denies same file exchange. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Denies renaming because of missing source and destination REFER. */ + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + /* + * Denies renaming because of missing MAKE_REG, source and destination + * REFER. + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Denies renaming because of missing source REFER. */ + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + /* Denies renaming because of missing MAKE_REG. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Denies renaming because of missing MAKE_REG. */ + ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Denies renaming because of missing destination REFER*/ + ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + /* Denies exchange because of one missing MAKE_REG. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + /* Allows renaming because of REFER and MAKE_REG. */ + ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3)); + + /* Reverse renaming denied because of missing MAKE_REG. */ + ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s2d3)); + ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3)); + + /* Tests reverse renaming. */ + ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3)); + ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3)); + + /* + * This is OK for a file rename, but it should not be allowed for a + * directory rename (because of the superset of access rights). + */ + ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3)); + + /* + * Tests superset restrictions applied to directories. Not only the + * dir_s2d3's parent (dir_s2d2) should be taken into account but also + * access rights tied to dir_s2d3. dir_s2d2 is missing one access right + * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided + * directly by the moved dir_s2d3. + */ + ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3)); + /* + * The first rename is allowed but not the exchange because dir_s1d3's + * parent (dir_s1d2) doesn't have REFER. + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + /* Renaming in the same directory is always allowed. */ + ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2)); + ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3)); + + ASSERT_EQ(0, unlink(file1_s1d2)); + /* Denies because of missing source MAKE_REG and destination REFER. */ + ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(0, unlink(file1_s1d3)); + /* Denies because of missing source MAKE_REG and REFER. */ + ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); +} + +static void +reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + /* Interesting for the layer2 tests. */ + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = dir_s2d3, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {}, + }; + const int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +static void +reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata) +{ + const struct rule layer2[] = { + { + .path = dir_s2d3, + .access = LANDLOCK_ACCESS_FS_MAKE_DIR, + }, + {}, + }; + /* + * Same checks as before but with a second layer and a new MAKE_DIR + * rule (and no explicit handling of REFER). + */ + const int ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, reparent_exdev_layers_rename1) +{ + ASSERT_EQ(0, unlink(file1_s2d2)); + ASSERT_EQ(0, unlink(file1_s2d3)); + + reparent_exdev_layers_enforce1(_metadata); + + /* + * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock + * because it doesn't inherit new access rights. + */ + ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2)); + ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3)); + + /* + * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it + * gets a new inherited access rights (MAKE_REG), because MAKE_REG is + * already allowed for dir_s1d3. + */ + ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3)); + ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3)); + + /* + * However, moving the file1_s1d3 file below dir_s2d3 is allowed + * because it cannot inherit MAKE_REG right (which is dedicated to + * directories). + */ + ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3)); + + reparent_exdev_layers_enforce2(_metadata); + + /* + * Moving the dir_s1d3 directory below dir_s2d2 is now denied because + * MAKE_DIR is not tied to dir_s2d2. + */ + ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + + /* + * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it + * would grants MAKE_REG and MAKE_DIR rights to it. + */ + ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + + /* + * However, moving the file2_s1d3 file below dir_s2d3 is allowed + * because it cannot inherit MAKE_REG nor MAKE_DIR rights (which are + * dedicated to directories). + */ + ASSERT_EQ(0, rename(file2_s1d3, file1_s2d3)); +} + +TEST_F_FORK(layout1, reparent_exdev_layers_rename2) +{ + reparent_exdev_layers_enforce1(_metadata); + + /* Checks EACCES predominance over EXDEV. */ + ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + /* Modify layout! */ + ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3)); + + /* Without REFER source. */ + ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2)); + ASSERT_EQ(EXDEV, errno); + + reparent_exdev_layers_enforce2(_metadata); + + /* Checks EACCES predominance over EXDEV. */ + ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + /* Checks with actual file2_s1d2. */ + ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + /* Modify layout! */ + ASSERT_EQ(0, rename(file2_s1d2, file1_s2d3)); + + /* Without REFER source, EACCES wins over EXDEV. */ + ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, reparent_exdev_layers_exchange1) +{ + const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 = + file2_s2d3; + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, mkdir(file1_s1d2, 0700)); + ASSERT_EQ(0, unlink(file2_s2d3)); + ASSERT_EQ(0, mkdir(file2_s2d3, 0700)); + + reparent_exdev_layers_enforce1(_metadata); + + /* Error predominance with file exchange: returns EXDEV and EACCES. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* + * Checks with directories which creation could be allowed, but denied + * because of access rights that would be inherited. + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, + dir_file2_s2d3, RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, + dir_file1_s1d2, RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* Checks with same access rights. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + + /* Checks with different (child-only) access rights. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2, + RENAME_EXCHANGE)); + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + + /* + * Checks that exchange between file and directory are consistent. + * + * Moving a file (file1_s2d2) to a directory which only grants more + * directory-related access rights is allowed, and at the same time + * moving a directory (dir_file2_s2d3) to another directory which + * grants less access rights is allowed too. + * + * See layout1.reparent_exdev_layers_exchange3 for inverted arguments. + */ + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3, + RENAME_EXCHANGE)); + /* + * However, moving back the directory is denied because it would get + * more access rights than the current state and because file creation + * is forbidden (in dir_s2d2). + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + reparent_exdev_layers_enforce2(_metadata); + + /* Error predominance with file exchange: returns EXDEV and EACCES. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Checks with directories which creation is now denied. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, + dir_file2_s2d3, RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, + dir_file1_s1d2, RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Checks with different (child-only) access rights. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + /* Denied because of MAKE_DIR. */ + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Checks with different (child-only) access rights. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2, + RENAME_EXCHANGE)); + /* Denied because of MAKE_DIR. */ + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* See layout1.reparent_exdev_layers_exchange2 for complement. */ +} + +TEST_F_FORK(layout1, reparent_exdev_layers_exchange2) +{ + const char *const dir_file2_s2d3 = file2_s2d3; + + ASSERT_EQ(0, unlink(file2_s2d3)); + ASSERT_EQ(0, mkdir(file2_s2d3, 0700)); + + reparent_exdev_layers_enforce1(_metadata); + reparent_exdev_layers_enforce2(_metadata); + + /* Checks that exchange between file and directory are consistent. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, reparent_exdev_layers_exchange3) +{ + const char *const dir_file2_s2d3 = file2_s2d3; + + ASSERT_EQ(0, unlink(file2_s2d3)); + ASSERT_EQ(0, mkdir(file2_s2d3, 0700)); + + reparent_exdev_layers_enforce1(_metadata); + + /* + * Checks that exchange between file and directory are consistent, + * including with inverted arguments (see + * layout1.reparent_exdev_layers_exchange1). + */ + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, reparent_remove) +{ + const struct rule layer1[] = { + { + .path = dir_s1d1, + .access = LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + { + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {}, + }; + const int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR | + LANDLOCK_ACCESS_FS_REMOVE_FILE, + layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Access denied because of wrong/swapped remove file/dir. */ + ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Access allowed thanks to the matching rights. */ + ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2)); + ASSERT_EQ(EISDIR, errno); + ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1)); + ASSERT_EQ(ENOTDIR, errno); + ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1)); + ASSERT_EQ(ENOTDIR, errno); + ASSERT_EQ(0, unlink(file1_s2d1)); + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1)); + + /* Effectively removes a file and a directory by exchanging them. */ + ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, reparent_dom_superset) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = file1_s1d2, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_MAKE_SOCK | + LANDLOCK_ACCESS_FS_EXECUTE, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_MAKE_SOCK, + }, + { + .path = dir_s2d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_MAKE_FIFO, + }, + {}, + }; + int ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_MAKE_SOCK | + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_MAKE_FIFO, + layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1)); + ASSERT_EQ(EXDEV, errno); + /* + * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE + * access right. + */ + ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + /* + * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a + * superset of access rights compared to dir_s1d2, because file1_s1d2 + * already has these access rights anyway. + */ + ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2)); + ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2)); + + ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1)); + ASSERT_EQ(EXDEV, errno); + /* + * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access + * right. + */ + ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + /* + * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset + * of access rights compared to dir_s1d2, because dir_s1d3 already has + * these access rights anyway. + */ + ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2)); + ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3)); + + /* + * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back + * will be denied because the new inherited access rights from dir_s1d2 + * will be less than the destination (original) dir_s2d3. This is a + * sinkhole scenario where we cannot move back files or directories. + */ + ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2)); + ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s2d3)); + /* + * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and + * MAKE_SOCK which were inherited from dir_s1d3. + */ + ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2)); + ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3)); + ASSERT_EQ(EXDEV, errno); +} + TEST_F_FORK(layout1, remove_dir) { const struct rule rules[] = { @@ -1750,10 +2694,10 @@ TEST_F_FORK(layout1, remove_dir) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); @@ -1787,10 +2731,10 @@ TEST_F_FORK(layout1, remove_file) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); @@ -1805,14 +2749,15 @@ TEST_F_FORK(layout1, remove_file) } static void test_make_file(struct __test_metadata *const _metadata, - const __u64 access, const mode_t mode, const dev_t dev) + const __u64 access, const mode_t mode, + const dev_t dev) { const struct rule rules[] = { { .path = dir_s1d2, .access = access, }, - {} + {}, }; const int ruleset_fd = create_ruleset(_metadata, access, rules); @@ -1820,9 +2765,10 @@ static void test_make_file(struct __test_metadata *const _metadata, ASSERT_EQ(0, unlink(file1_s1d1)); ASSERT_EQ(0, unlink(file2_s1d1)); - ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) { - TH_LOG("Failed to make file \"%s\": %s", - file2_s1d1, strerror(errno)); + ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) + { + TH_LOG("Failed to make file \"%s\": %s", file2_s1d1, + strerror(errno)); }; ASSERT_EQ(0, unlink(file1_s1d2)); @@ -1841,9 +2787,10 @@ static void test_make_file(struct __test_metadata *const _metadata, ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); ASSERT_EQ(EACCES, errno); - ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) { - TH_LOG("Failed to make file \"%s\": %s", - file1_s1d2, strerror(errno)); + ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) + { + TH_LOG("Failed to make file \"%s\": %s", file1_s1d2, + strerror(errno)); }; ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); ASSERT_EQ(0, unlink(file2_s1d2)); @@ -1860,7 +2807,7 @@ TEST_F_FORK(layout1, make_char) /* Creates a /dev/null device. */ set_cap(_metadata, CAP_MKNOD); test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR, - makedev(1, 3)); + makedev(1, 3)); } TEST_F_FORK(layout1, make_block) @@ -1868,7 +2815,7 @@ TEST_F_FORK(layout1, make_block) /* Creates a /dev/loop0 device. */ set_cap(_metadata, CAP_MKNOD); test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK, - makedev(7, 0)); + makedev(7, 0)); } TEST_F_FORK(layout1, make_reg_1) @@ -1898,10 +2845,10 @@ TEST_F_FORK(layout1, make_sym) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_MAKE_SYM, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); @@ -1943,10 +2890,10 @@ TEST_F_FORK(layout1, make_dir) .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_MAKE_DIR, }, - {} + {}, }; - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); @@ -1965,12 +2912,12 @@ TEST_F_FORK(layout1, make_dir) } static int open_proc_fd(struct __test_metadata *const _metadata, const int fd, - const int open_flags) + const int open_flags) { static const char path_template[] = "/proc/self/fd/%d"; char procfd_path[sizeof(path_template) + 10]; - const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), - path_template, fd); + const int procfd_path_size = + snprintf(procfd_path, sizeof(procfd_path), path_template, fd); ASSERT_LT(procfd_path_size, sizeof(procfd_path)); return open(procfd_path, open_flags); @@ -1983,12 +2930,13 @@ TEST_F_FORK(layout1, proc_unlinked_file) .path = file1_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; int reg_fd, proc_fd; - const int ruleset_fd = create_ruleset(_metadata, - LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, rules); + const int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE, + rules); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); @@ -2005,9 +2953,10 @@ TEST_F_FORK(layout1, proc_unlinked_file) ASSERT_EQ(0, close(proc_fd)); proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC); - ASSERT_EQ(-1, proc_fd) { - TH_LOG("Successfully opened /proc/self/fd/%d: %s", - reg_fd, strerror(errno)); + ASSERT_EQ(-1, proc_fd) + { + TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd, + strerror(errno)); } ASSERT_EQ(EACCES, errno); @@ -2023,13 +2972,13 @@ TEST_F_FORK(layout1, proc_pipe) { .path = dir_s1d2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; /* Limits read and write access to files tied to the filesystem. */ - const int ruleset_fd = create_ruleset(_metadata, rules[0].access, - rules); + const int ruleset_fd = + create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); @@ -2041,7 +2990,8 @@ TEST_F_FORK(layout1, proc_pipe) /* Checks access to pipes through FD. */ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC)); - ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) { + ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) + { TH_LOG("Failed to write in pipe: %s", strerror(errno)); } ASSERT_EQ(1, read(pipe_fds[0], &buf, 1)); @@ -2050,9 +3000,10 @@ TEST_F_FORK(layout1, proc_pipe) /* Checks write access to pipe through /proc/self/fd . */ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC); ASSERT_LE(0, proc_fd); - ASSERT_EQ(1, write(proc_fd, ".", 1)) { + ASSERT_EQ(1, write(proc_fd, ".", 1)) + { TH_LOG("Failed to write through /proc/self/fd/%d: %s", - pipe_fds[1], strerror(errno)); + pipe_fds[1], strerror(errno)); } ASSERT_EQ(0, close(proc_fd)); @@ -2060,9 +3011,10 @@ TEST_F_FORK(layout1, proc_pipe) proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC); ASSERT_LE(0, proc_fd); buf = '\0'; - ASSERT_EQ(1, read(proc_fd, &buf, 1)) { + ASSERT_EQ(1, read(proc_fd, &buf, 1)) + { TH_LOG("Failed to read through /proc/self/fd/%d: %s", - pipe_fds[1], strerror(errno)); + pipe_fds[1], strerror(errno)); } ASSERT_EQ(0, close(proc_fd)); @@ -2070,8 +3022,9 @@ TEST_F_FORK(layout1, proc_pipe) ASSERT_EQ(0, close(pipe_fds[1])); } -FIXTURE(layout1_bind) { -}; +/* clang-format off */ +FIXTURE(layout1_bind) {}; +/* clang-format on */ FIXTURE_SETUP(layout1_bind) { @@ -2161,7 +3114,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file) .path = dir_s2d1, .access = ACCESS_RW, }, - {} + {}, }; /* * Sets access rights on the same bind-mounted directories. The result @@ -2177,7 +3130,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file) .path = dir_s2d2, .access = ACCESS_RW, }, - {} + {}, }; /* Only allow read-access to the s1d3 hierarchies. */ const struct rule layer3_source[] = { @@ -2185,7 +3138,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file) .path = dir_s1d3, .access = LANDLOCK_ACCESS_FS_READ_FILE, }, - {} + {}, }; /* Removes all access rights. */ const struct rule layer4_destination[] = { @@ -2193,7 +3146,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file) .path = bind_file1_s1d3, .access = LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; int ruleset_fd; @@ -2282,8 +3235,46 @@ TEST_F_FORK(layout1_bind, same_content_same_file) ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); } -#define LOWER_BASE TMP_DIR "/lower" -#define LOWER_DATA LOWER_BASE "/data" +TEST_F_FORK(layout1_bind, reparent_cross_mount) +{ + const struct rule layer1[] = { + { + /* dir_s2d1 is beneath the dir_s2d2 mount point. */ + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + { + .path = bind_dir_s1d3, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + {}, + }; + int ruleset_fd = create_ruleset( + _metadata, + LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks basic denied move. */ + ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + + /* Checks real cross-mount move (Landlock is not involved). */ + ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2)); + ASSERT_EQ(EXDEV, errno); + + /* Checks move that will give more accesses. */ + ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + /* Checks legitimate downgrade move. */ + ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2)); +} + +#define LOWER_BASE TMP_DIR "/lower" +#define LOWER_DATA LOWER_BASE "/data" static const char lower_fl1[] = LOWER_DATA "/fl1"; static const char lower_dl1[] = LOWER_DATA "/dl1"; static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2"; @@ -2295,23 +3286,23 @@ static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3"; static const char (*lower_base_files[])[] = { &lower_fl1, &lower_fo1, - NULL + NULL, }; static const char (*lower_base_directories[])[] = { &lower_dl1, &lower_do1, - NULL + NULL, }; static const char (*lower_sub_files[])[] = { &lower_dl1_fl2, &lower_do1_fo2, &lower_do1_fl3, - NULL + NULL, }; -#define UPPER_BASE TMP_DIR "/upper" -#define UPPER_DATA UPPER_BASE "/data" -#define UPPER_WORK UPPER_BASE "/work" +#define UPPER_BASE TMP_DIR "/upper" +#define UPPER_DATA UPPER_BASE "/data" +#define UPPER_WORK UPPER_BASE "/work" static const char upper_fu1[] = UPPER_DATA "/fu1"; static const char upper_du1[] = UPPER_DATA "/du1"; static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2"; @@ -2323,22 +3314,22 @@ static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3"; static const char (*upper_base_files[])[] = { &upper_fu1, &upper_fo1, - NULL + NULL, }; static const char (*upper_base_directories[])[] = { &upper_du1, &upper_do1, - NULL + NULL, }; static const char (*upper_sub_files[])[] = { &upper_du1_fu2, &upper_do1_fo2, &upper_do1_fu3, - NULL + NULL, }; -#define MERGE_BASE TMP_DIR "/merge" -#define MERGE_DATA MERGE_BASE "/data" +#define MERGE_BASE TMP_DIR "/merge" +#define MERGE_DATA MERGE_BASE "/data" static const char merge_fl1[] = MERGE_DATA "/fl1"; static const char merge_dl1[] = MERGE_DATA "/dl1"; static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2"; @@ -2355,21 +3346,17 @@ static const char (*merge_base_files[])[] = { &merge_fl1, &merge_fu1, &merge_fo1, - NULL + NULL, }; static const char (*merge_base_directories[])[] = { &merge_dl1, &merge_du1, &merge_do1, - NULL + NULL, }; static const char (*merge_sub_files[])[] = { - &merge_dl1_fl2, - &merge_du1_fu2, - &merge_do1_fo2, - &merge_do1_fl3, - &merge_do1_fu3, - NULL + &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2, + &merge_do1_fl3, &merge_do1_fu3, NULL, }; /* @@ -2411,8 +3398,9 @@ static const char (*merge_sub_files[])[] = { * └── work */ -FIXTURE(layout2_overlay) { -}; +/* clang-format off */ +FIXTURE(layout2_overlay) {}; +/* clang-format on */ FIXTURE_SETUP(layout2_overlay) { @@ -2444,9 +3432,8 @@ FIXTURE_SETUP(layout2_overlay) set_cap(_metadata, CAP_SYS_ADMIN); set_cap(_metadata, CAP_DAC_OVERRIDE); ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0, - "lowerdir=" LOWER_DATA - ",upperdir=" UPPER_DATA - ",workdir=" UPPER_WORK)); + "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA + ",workdir=" UPPER_WORK)); clear_cap(_metadata, CAP_DAC_OVERRIDE); clear_cap(_metadata, CAP_SYS_ADMIN); } @@ -2513,9 +3500,9 @@ TEST_F_FORK(layout2_overlay, no_restriction) ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY)); } -#define for_each_path(path_list, path_entry, i) \ - for (i = 0, path_entry = *path_list[i]; path_list[i]; \ - path_entry = *path_list[++i]) +#define for_each_path(path_list, path_entry, i) \ + for (i = 0, path_entry = *path_list[i]; path_list[i]; \ + path_entry = *path_list[++i]) TEST_F_FORK(layout2_overlay, same_content_different_file) { @@ -2533,7 +3520,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) .path = MERGE_BASE, .access = ACCESS_RW, }, - {} + {}, }; const struct rule layer2_data[] = { { @@ -2548,7 +3535,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) .path = MERGE_DATA, .access = ACCESS_RW, }, - {} + {}, }; /* Sets access right on directories inside both layers. */ const struct rule layer3_subdirs[] = { @@ -2580,7 +3567,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) .path = merge_do1, .access = ACCESS_RW, }, - {} + {}, }; /* Tighten access rights to the files. */ const struct rule layer4_files[] = { @@ -2611,37 +3598,37 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) { .path = merge_dl1_fl2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, { .path = merge_du1_fu2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, { .path = merge_do1_fo2, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, { .path = merge_do1_fl3, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, { .path = merge_do1_fu3, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; const struct rule layer5_merge_only[] = { { .path = MERGE_DATA, .access = LANDLOCK_ACCESS_FS_READ_FILE | - LANDLOCK_ACCESS_FS_WRITE_FILE, + LANDLOCK_ACCESS_FS_WRITE_FILE, }, - {} + {}, }; int ruleset_fd; size_t i; @@ -2659,7 +3646,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); } for_each_path(lower_base_directories, path_entry, i) { - ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(EACCES, + test_open(path_entry, O_RDONLY | O_DIRECTORY)); } for_each_path(lower_sub_files, path_entry, i) { ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); @@ -2671,7 +3659,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); } for_each_path(upper_base_directories, path_entry, i) { - ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(EACCES, + test_open(path_entry, O_RDONLY | O_DIRECTORY)); } for_each_path(upper_sub_files, path_entry, i) { ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); @@ -2756,7 +3745,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); } for_each_path(merge_base_directories, path_entry, i) { - ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(EACCES, + test_open(path_entry, O_RDONLY | O_DIRECTORY)); } for_each_path(merge_sub_files, path_entry, i) { ASSERT_EQ(0, test_open(path_entry, O_RDWR)); @@ -2781,7 +3771,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); } for_each_path(merge_base_directories, path_entry, i) { - ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(EACCES, + test_open(path_entry, O_RDONLY | O_DIRECTORY)); } for_each_path(merge_sub_files, path_entry, i) { ASSERT_EQ(0, test_open(path_entry, O_RDWR)); diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index 15fbef9cc849..c28ef98ff3ac 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -26,9 +26,10 @@ static void create_domain(struct __test_metadata *const _metadata) .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK, }; - ruleset_fd = landlock_create_ruleset(&ruleset_attr, - sizeof(ruleset_attr), 0); - EXPECT_LE(0, ruleset_fd) { + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + EXPECT_LE(0, ruleset_fd) + { TH_LOG("Failed to create a ruleset: %s", strerror(errno)); } EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); @@ -43,7 +44,7 @@ static int test_ptrace_read(const pid_t pid) int procenv_path_size, fd; procenv_path_size = snprintf(procenv_path, sizeof(procenv_path), - path_template, pid); + path_template, pid); if (procenv_path_size >= sizeof(procenv_path)) return E2BIG; @@ -59,9 +60,12 @@ static int test_ptrace_read(const pid_t pid) return 0; } -FIXTURE(hierarchy) { }; +/* clang-format off */ +FIXTURE(hierarchy) {}; +/* clang-format on */ -FIXTURE_VARIANT(hierarchy) { +FIXTURE_VARIANT(hierarchy) +{ const bool domain_both; const bool domain_parent; const bool domain_child; @@ -83,7 +87,9 @@ FIXTURE_VARIANT(hierarchy) { * \ P2 -> P1 : allow * 'P2 */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { + /* clang-format on */ .domain_both = false, .domain_parent = false, .domain_child = false, @@ -98,7 +104,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { * | P2 | * '------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { + /* clang-format on */ .domain_both = false, .domain_parent = false, .domain_child = true, @@ -112,7 +120,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { * ' * P2 */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { + /* clang-format on */ .domain_both = false, .domain_parent = true, .domain_child = false, @@ -127,7 +137,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { * | P2 | * '------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { + /* clang-format on */ .domain_both = false, .domain_parent = true, .domain_child = true, @@ -142,7 +154,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { * | P2 | * '-------------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { + /* clang-format on */ .domain_both = true, .domain_parent = false, .domain_child = false, @@ -158,7 +172,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { * | '------' | * '-----------------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { + /* clang-format on */ .domain_both = true, .domain_parent = false, .domain_child = true, @@ -174,7 +190,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { * | P2 | * '-----------------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { + /* clang-format on */ .domain_both = true, .domain_parent = true, .domain_child = false, @@ -192,17 +210,21 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { * | '------' | * '-----------------' */ +/* clang-format off */ FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) { + /* clang-format on */ .domain_both = true, .domain_parent = true, .domain_child = true, }; FIXTURE_SETUP(hierarchy) -{ } +{ +} FIXTURE_TEARDOWN(hierarchy) -{ } +{ +} /* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */ TEST_F(hierarchy, trace) @@ -330,7 +352,7 @@ TEST_F(hierarchy, trace) ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ASSERT_EQ(child, waitpid(child, &status, 0)); if (WIFSIGNALED(status) || !WIFEXITED(status) || - WEXITSTATUS(status) != EXIT_SUCCESS) + WEXITSTATUS(status) != EXIT_SUCCESS) _metadata->passed = 0; } diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 84fda3b49073..5c16159d0bcd 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -35,6 +35,7 @@ #include <sys/time.h> #include <sys/resource.h> #include <sys/stat.h> +#include <sys/param.h> #include <mqueue.h> #include <popt.h> #include <error.h> @@ -73,7 +74,6 @@ static char *usage = char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max"; char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max"; -#define min(a, b) ((a) < (b) ? (a) : (b)) #define MAX_CPUS 64 char *cpu_option_string; int cpus_to_pin[MAX_CPUS]; @@ -560,7 +560,7 @@ int main(int argc, char *argv[]) "require root in order to modify\nsystem settings. " "Exiting.\n"); - cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); + cpus_online = MIN(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { perror("CPU_ALLOC()"); diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index c35ba24f994c..66d0414d8e4b 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -301,7 +301,7 @@ specify_qemu_cpus () { echo $2 -smp $3 ;; qemu-system-ppc64) - nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`" + nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`" echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt ;; esac diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 5f682fc892dd..88983cba7956 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -36,7 +36,7 @@ do then egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags files="$files $i.diags $i" - elif ! test -f ${scenariobasedir}/vmlinux + elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run" then echo No ${scenariobasedir}/vmlinux file > $i.diags files="$files $i.diags $i" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 0a5419982ab3..0789c5606d2a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -33,7 +33,12 @@ do TORTURE_SUITE="`cat $i/../torture_suite`" configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags - kvm-recheck-${TORTURE_SUITE}.sh $i + case "${TORTURE_SUITE}" in + X*) + ;; + *) + kvm-recheck-${TORTURE_SUITE}.sh $i + esac if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137 then echo QEMU error, output: diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 8c4c1e4792d0..0ff59bd8b640 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -138,14 +138,14 @@ chmod +x $T/bin/kvm-remote-*.sh # Check first to avoid the need for cleanup for system-name typos for i in $systems do - ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`" - echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" + ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`" ret=$? if test "$ret" -ne 0 then echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" exit 4 fi + echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" done # Download and expand the tarball on all systems. @@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log" for i in $systems do echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" - cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" + cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" ret=$? tries=0 while test "$ret" -ne 0 do echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" sleep 60 - cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" + cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" ret=$? if test "$ret" -ne 0 then @@ -185,7 +185,7 @@ checkremotefile () { while : do - ssh $1 "test -f \"$2\"" + ssh -o BatchMode=yes $1 "test -f \"$2\"" ret=$? if test "$ret" -eq 255 then @@ -228,7 +228,7 @@ startbatches () { then continue # System still running last test, skip. fi - ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 + ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 ret=$? if test "$ret" -ne 0 then @@ -267,7 +267,7 @@ do sleep 30 done echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 55b2c1533282..263e16aeca0e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" TORTURE_QEMU_MEM=512 +torture_qemu_mem_default=1 TORTURE_REMOTE= TORTURE_SHUTDOWN_GRACE=180 TORTURE_SUITE=rcu @@ -86,7 +87,7 @@ usage () { echo " --remote" echo " --results absolute-pathname" echo " --shutdown-grace seconds" - echo " --torture lock|rcu|rcuscale|refscale|scf" + echo " --torture lock|rcu|rcuscale|refscale|scf|X*" echo " --trust-make" exit 1 } @@ -180,6 +181,10 @@ do ;; --kasan) TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + if test -n "$torture_qemu_mem_default" + then + TORTURE_QEMU_MEM=2G + fi ;; --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' @@ -202,6 +207,7 @@ do --memory) checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error TORTURE_QEMU_MEM=$2 + torture_qemu_mem_default= shift ;; --no-initrd) @@ -231,7 +237,7 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--' TORTURE_SUITE=$2 TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`" shift diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index bfe09e2829c8..d477618e7261 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -54,6 +54,7 @@ do_kvfree=yes do_kasan=yes do_kcsan=no do_clocksourcewd=yes +do_rt=yes # doyesno - Helper function for yes/no arguments function doyesno () { @@ -82,6 +83,7 @@ usage () { echo " --do-rcuscale / --do-no-rcuscale" echo " --do-rcutorture / --do-no-rcutorture" echo " --do-refscale / --do-no-refscale" + echo " --do-rt / --do-no-rt" echo " --do-scftorture / --do-no-scftorture" echo " --duration [ <minutes> | <hours>h | <days>d ]" echo " --kcsan-kmake-arg kernel-make-arguments" @@ -118,6 +120,7 @@ do do_scftorture=yes do_rcuscale=yes do_refscale=yes + do_rt=yes do_kvfree=yes do_kasan=yes do_kcsan=yes @@ -148,6 +151,7 @@ do do_scftorture=no do_rcuscale=no do_refscale=no + do_rt=no do_kvfree=no do_kasan=no do_kcsan=no @@ -162,6 +166,9 @@ do --do-refscale|--do-no-refscale) do_refscale=`doyesno "$1" --do-refscale` ;; + --do-rt|--do-no-rt) + do_rt=`doyesno "$1" --do-rt` + ;; --do-scftorture|--do-no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; @@ -322,6 +329,7 @@ then echo " --- make clean" > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 + cp .config $amcdir make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 echo " --- make " >> "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 @@ -350,8 +358,19 @@ fi if test "$do_scftorture" = "yes" then - torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot" - torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make + torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make +fi + +if test "$do_rt" = "yes" +then + # With all post-boot grace periods forced to normal. + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + + # With all post-boot grace periods forced to expedited. + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make fi if test "$do_refscale" = yes @@ -363,7 +382,7 @@ fi for prim in $primlist do torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make done if test "$do_rcuscale" = yes @@ -375,13 +394,13 @@ fi for prim in $primlist do torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make done if test "$do_kvfree" = "yes" then torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make fi if test "$do_clocksourcewd" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 index 7093422050f6..6fd6acb94518 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_EXPERT=y +CONFIG_FORCE_TASKS_RUDE_RCU=y +#CHECK#CONFIG_TASKS_RUDE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 2da8b49589a0..07f5e0a70ae7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_RCU_EXPERT=n +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 index 3ca112444ce7..d84801b9a7ae 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 @@ -7,4 +7,5 @@ CONFIG_PREEMPT=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_TASKS_RCU=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 index ad2be91e5ee7..2f9fcffff5ae 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 @@ -2,3 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n +#CHECK#CONFIG_TASKS_RCU=y +CONFIG_FORCE_TASKS_RCU=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot index cd2a188eeb6d..b9b6d67cbc5f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks +rcutorture.stat_interval=60 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index dc02083803ce..dea26c568678 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y #CHECK#CONFIG_RCU_EXPERT=n +CONFIG_TASKS_RCU=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index e4d74e5fc1d0..85b407467454 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n #CHECK#CONFIG_PROVE_RCU=n +CONFIG_FORCE_TASKS_TRACE_RCU=y +#CHECK#CONFIG_TASKS_TRACE_RCU=y CONFIG_TASKS_TRACE_RCU_READ_MB=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 index 77541eeb4e9f..093ea6e8e65c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 @@ -7,5 +7,7 @@ CONFIG_PREEMPT=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_FORCE_TASKS_TRACE_RCU=y +#CHECK#CONFIG_TASKS_TRACE_RCU=y CONFIG_TASKS_TRACE_RCU_READ_MB=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 22ad0261728d..ae395981b5e5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -1,8 +1,9 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=y CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 2789b47e4ecd..d30922d8c883 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16 CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index 8523a7515cbf..fc45645bb5f4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n #CHECK#CONFIG_RCU_EXPERT=n +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 4a00539bfdd7..a323d8948b7c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56 CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh index effa415f9b92..e2bc99c785e7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -9,7 +9,7 @@ # rcutorture_param_n_barrier_cbs bootparam-string # -# Adds n_barrier_cbs rcutorture module parameter to kernels having it. +# Adds n_barrier_cbs rcutorture module parameter if not already specified. rcutorture_param_n_barrier_cbs () { if echo $1 | grep -q "rcutorture\.n_barrier_cbs" then @@ -30,13 +30,25 @@ rcutorture_param_onoff () { fi } +# rcutorture_param_stat_interval bootparam-string +# +# Adds stat_interval rcutorture module parameter if not already specified. +rcutorture_param_stat_interval () { + if echo $1 | grep -q "rcutorture\.stat_interval" + then + : + else + echo rcutorture.stat_interval=15 + fi +} + # per_version_boot_params bootparam-string config-file seconds # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { echo $1 `rcutorture_param_onoff "$1" "$2"` \ `rcutorture_param_n_barrier_cbs "$1"` \ - rcutorture.stat_interval=15 \ + `rcutorture_param_stat_interval "$1"` \ rcutorture.shutdown_secs=$3 \ rcutorture.test_no_idle_hz=1 \ rcutorture.verbose=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon index 90942bb5bebc..6a00157bee5b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -1,5 +1,6 @@ CONFIG_RCU_SCALE_TEST=y CONFIG_PRINTK_TIME=y -CONFIG_TASKS_RCU_GENERIC=y -CONFIG_TASKS_RCU=y -CONFIG_TASKS_TRACE_RCU=y +CONFIG_FORCE_TASKS_RCU=y +#CHECK#CONFIG_TASKS_RCU=y +CONFIG_FORCE_TASKS_TRACE_RCU=y +#CHECK#CONFIG_TASKS_TRACE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index f110d9ffbe4c..b10706fd03a4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE @@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y CONFIG_RCU_TRACE=y +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon index a98b58b54bb1..fbea3b13baba 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon @@ -1,2 +1,6 @@ CONFIG_RCU_REF_SCALE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_FORCE_TASKS_RCU=y +#CHECK#CONFIG_TASKS_RCU=y +CONFIG_FORCE_TASKS_TRACE_RCU=y +#CHECK#CONFIG_TASKS_TRACE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index 7f06838a91e6..ef2b501a6971 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT index b8429d6c6ebc..3a59346b3de7 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT index ae4992b141b0..cb37e08037d6 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT +++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT @@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh index d3d9e35d3d55..2d949e58f5a5 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh @@ -25,6 +25,5 @@ per_version_boot_params () { echo $1 `scftorture_param_onoff "$1" "$2"` \ scftorture.stat_interval=15 \ scftorture.shutdown_secs=$3 \ - scftorture.verbose=1 \ - scf + scftorture.verbose=1 } diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 6bcee2ec91a9..73d53257df42 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,17 +1,10 @@ -CC = $(CROSS_COMPILE)gcc -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -SRCS=$(wildcard *.c) -OBJS=$(SRCS:.c=.o) - -all: resctrl_tests +# SPDX-License-Identifier: GPL-2.0 -$(OBJS): $(SRCS) - $(CC) $(CFLAGS) -c $(SRCS) +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 +CFLAGS += $(KHDR_INCLUDES) -resctrl_tests: $(OBJS) - $(CC) $(CFLAGS) -o $@ $^ +TEST_GEN_PROGS := resctrl_tests -.PHONY: clean +include ../lib.mk -clean: - $(RM) $(OBJS) resctrl_tests +$(OUTPUT)/resctrl_tests: $(wildcard *.c) diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README index 3d2bbd4fa3aa..8d11ce7c2ee5 100644 --- a/tools/testing/selftests/resctrl/README +++ b/tools/testing/selftests/resctrl/README @@ -12,24 +12,49 @@ Allocation test on Intel RDT hardware. More tests will be added in the future. And the test suit can be extended to cover AMD QoS and ARM MPAM hardware as well. +resctrl_tests can be run with or without kselftest framework. + +WITH KSELFTEST FRAMEWORK +======================= + BUILD ----- -Run "make" to build executable file "resctrl_tests". +Build executable file "resctrl_tests" from top level directory of the kernel source: + $ make -C tools/testing/selftests TARGETS=resctrl RUN --- -To use resctrl_tests, root or sudoer privileges are required. This is because -the test needs to mount resctrl file system and change contents in the file -system. +Run resctrl_tests as sudo or root since the test needs to mount resctrl file +system and change contents in the file system. +Using kselftest framework will run all supported tests within resctrl_tests: + + $ sudo make -C tools/testing/selftests TARGETS=resctrl run_tests + +More details about kselftest framework can be found in +Documentation/dev-tools/kselftest.rst. + +WITHOUT KSELFTEST FRAMEWORK +=========================== + +BUILD +----- + +Build executable file "resctrl_tests" from this directory(tools/testing/selftests/resctrl/): + $ make + +RUN +--- +Run resctrl_tests as sudo or root since the test needs to mount resctrl file +system and change contents in the file system. Executing the test without any parameter will run all supported tests: - sudo ./resctrl_tests + $ sudo ./resctrl_tests OVERVIEW OF EXECUTION ---------------------- +===================== A test case has four stages: @@ -41,7 +66,7 @@ A test case has four stages: - teardown: umount resctrl and clear temporary files. ARGUMENTS ---------- +========= Parameter '-h' shows usage information. diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index cd4f68388e0f..1c5e90c63254 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -89,7 +89,7 @@ static int check_results(struct resctrl_val_param *param) return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, - !is_amd, false); + get_vendor() == ARCH_INTEL, false); } void cat_test_cleanup(void) diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 51e5cf22632f..56ccbeae0638 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -121,8 +121,10 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, /* Consume read result so that reading memory is not optimized out. */ fp = fopen("/dev/null", "w"); - if (!fp) + if (!fp) { perror("Unable to write to /dev/null"); + return -1; + } fprintf(fp, "Sum: %d ", ret); fclose(fp); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 1ad10c47e31d..f0ded31fb3c7 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -34,6 +34,9 @@ #define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON" #define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features" +#define ARCH_INTEL 1 +#define ARCH_AMD 2 + #define PARENT_EXIT(err_msg) \ do { \ perror(err_msg); \ @@ -75,8 +78,8 @@ struct resctrl_val_param { extern pid_t bm_pid, ppid; extern char llc_occup_path[1024]; -extern bool is_amd; +int get_vendor(void); bool check_resctrlfs_support(void); int filter_dmesg(void); int remount_resctrlfs(bool mum_resctrlfs); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 973f09a66e1e..df0d8d8526fc 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -13,25 +13,41 @@ #define BENCHMARK_ARGS 64 #define BENCHMARK_ARG_SIZE 64 -bool is_amd; - -void detect_amd(void) +static int detect_vendor(void) { FILE *inf = fopen("/proc/cpuinfo", "r"); + int vendor_id = 0; + char *s = NULL; char *res; if (!inf) - return; + return vendor_id; res = fgrep(inf, "vendor_id"); - if (res) { - char *s = strchr(res, ':'); + if (res) + s = strchr(res, ':'); + + if (s && !strcmp(s, ": GenuineIntel\n")) + vendor_id = ARCH_INTEL; + else if (s && !strcmp(s, ": AuthenticAMD\n")) + vendor_id = ARCH_AMD; - is_amd = s && !strcmp(s, ": AuthenticAMD\n"); - free(res); - } fclose(inf); + free(res); + return vendor_id; +} + +int get_vendor(void) +{ + static int vendor = -1; + + if (vendor == -1) + vendor = detect_vendor(); + if (vendor == 0) + ksft_print_msg("Can not get vendor info...\n"); + + return vendor; } static void cmd_help(void) @@ -70,6 +86,8 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span, sprintf(benchmark_cmd[5], "%s", MBA_STR); res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); ksft_test_result(!res, "MBM: bw change\n"); + if ((get_vendor() == ARCH_INTEL) && res) + ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); mbm_test_cleanup(); } @@ -106,6 +124,8 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no) sprintf(benchmark_cmd[5], "%s", CMT_STR); res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); ksft_test_result(!res, "CMT: test\n"); + if ((get_vendor() == ARCH_INTEL) && res) + ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); cmt_test_cleanup(); } @@ -205,10 +225,7 @@ int main(int argc, char **argv) * 2. We execute perf commands */ if (geteuid() != 0) - return ksft_exit_fail_msg("Not running as root, abort testing.\n"); - - /* Detect AMD vendor */ - detect_amd(); + return ksft_exit_skip("Not running as root. Skipping...\n"); if (has_ben) { /* Extract benchmark command from command line. */ @@ -235,16 +252,16 @@ int main(int argc, char **argv) sprintf(bm_type, "fill_buf"); if (!check_resctrlfs_support()) - return ksft_exit_fail_msg("resctrl FS does not exist\n"); + return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); filter_dmesg(); ksft_set_plan(tests ? : 4); - if (!is_amd && mbm_test) + if ((get_vendor() == ARCH_INTEL) && mbm_test) run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); - if (!is_amd && mba_test) + if ((get_vendor() == ARCH_INTEL) && mba_test) run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); if (cmt_test) diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 95224345c78e..b32b96356ec7 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -678,6 +678,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) sigemptyset(&sigact.sa_mask); sigact.sa_flags = SA_SIGINFO; if (sigaction(SIGINT, &sigact, NULL) || + sigaction(SIGTERM, &sigact, NULL) || sigaction(SIGHUP, &sigact, NULL)) { perror("# sigaction"); ret = errno; diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 5f5a166ade60..6f543e470ad4 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -106,7 +106,7 @@ int get_resource_id(int cpu_no, int *resource_id) char phys_pkg_path[1024]; FILE *fp; - if (is_amd) + if (get_vendor() == ARCH_AMD) sprintf(phys_pkg_path, "%s%d/cache/index3/id", PHYS_ID_PATH, cpu_no); else diff --git a/tools/testing/selftests/resctrl/settings b/tools/testing/selftests/resctrl/settings new file mode 100644 index 000000000000..a383f3d4565b --- /dev/null +++ b/tools/testing/selftests/resctrl/settings @@ -0,0 +1,3 @@ +# If running time is longer than 120 seconds when new tests are added in +# the future, increase timeout here. +timeout=120 diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 585f7a0c10cb..f017c382c036 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ LDFLAGS += -lpthread +LDLIBS += -lcap TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark include ../lib.mk diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 313bb0cbfb1e..136df5b76319 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -46,6 +46,7 @@ #include <sys/ioctl.h> #include <linux/kcmp.h> #include <sys/resource.h> +#include <sys/capability.h> #include <unistd.h> #include <sys/syscall.h> @@ -59,6 +60,8 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 #endif @@ -268,6 +271,10 @@ struct seccomp_notif_addfd_big { #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) #endif +#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -3742,7 +3749,10 @@ TEST(user_notification_fault_recv) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { + if (errno == EINVAL) + SKIP(return, "kernel missing CLONE_NEWUSER support"); + } listener = user_notif_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); @@ -4231,6 +4241,421 @@ TEST(user_notification_addfd_rlimit) close(memfd); } +/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ +FIXTURE(O_SUSPEND_SECCOMP) { + pid_t pid; +}; + +FIXTURE_SETUP(O_SUSPEND_SECCOMP) +{ + ERRNO_FILTER(block_read, E2BIG); + cap_value_t cap_list[] = { CAP_SYS_ADMIN }; + cap_t caps; + + self->pid = 0; + + /* make sure we don't have CAP_SYS_ADMIN */ + caps = cap_get_proc(); + ASSERT_NE(NULL, caps); + ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); + ASSERT_EQ(0, cap_set_proc(caps)); + cap_free(caps); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); + + self->pid = fork(); + ASSERT_GE(self->pid, 0); + + if (self->pid == 0) { + while (1) + pause(); + _exit(127); + } +} + +FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) +{ + if (self->pid) + kill(self->pid, SIGKILL); +} + +TEST_F(O_SUSPEND_SECCOMP, setoptions) +{ + int wstatus; + + ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); + ASSERT_EQ(self->pid, wait(&wstatus)); + ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); + if (errno == EINVAL) + SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); + ASSERT_EQ(EPERM, errno); +} + +TEST_F(O_SUSPEND_SECCOMP, seize) +{ + int ret; + + ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); + ASSERT_EQ(-1, ret); + if (errno == EINVAL) + SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); + ASSERT_EQ(EPERM, errno); +} + +/* + * get_nth - Get the nth, space separated entry in a file. + * + * Returns the length of the read field. + * Throws error if field is zero-lengthed. + */ +static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, + const unsigned int position, char **entry) +{ + char *line = NULL; + unsigned int i; + ssize_t nread; + size_t len = 0; + FILE *f; + + f = fopen(path, "r"); + ASSERT_NE(f, NULL) { + TH_LOG("Could not open %s: %s", path, strerror(errno)); + } + + for (i = 0; i < position; i++) { + nread = getdelim(&line, &len, ' ', f); + ASSERT_GE(nread, 0) { + TH_LOG("Failed to read %d entry in file %s", i, path); + } + } + fclose(f); + + ASSERT_GT(nread, 0) { + TH_LOG("Entry in file %s had zero length", path); + } + + *entry = line; + return nread - 1; +} + +/* For a given PID, get the task state (D, R, etc...) */ +static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) +{ + char proc_path[100] = {0}; + char status; + char *line; + + snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); + ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); + + status = *line; + free(line); + + return status; +} + +TEST(user_notification_fifo) +{ + struct seccomp_notif_resp resp = {}; + struct seccomp_notif req = {}; + int i, status, listener; + pid_t pid, pids[3]; + __u64 baseid; + long ret; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Setup a listener */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = syscall(__NR_getppid); + exit(ret != USER_NOTIF_MAGIC); + } + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + baseid = req.id + 1; + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + /* check that we make sure flags == 0 */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* Start children, and generate notifications */ + for (i = 0; i < ARRAY_SIZE(pids); i++) { + pid = fork(); + if (pid == 0) { + ret = syscall(__NR_getppid); + exit(ret != USER_NOTIF_MAGIC); + } + pids[i] = pid; + } + + /* This spins until all of the children are sleeping */ +restart_wait: + for (i = 0; i < ARRAY_SIZE(pids); i++) { + if (get_proc_stat(_metadata, pids[i]) != 'S') { + nanosleep(&delay, NULL); + goto restart_wait; + } + } + + /* Read the notifications in order (and respond) */ + for (i = 0; i < ARRAY_SIZE(pids); i++) { + memset(&req, 0, sizeof(req)); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(req.id, baseid + i); + resp.id = req.id; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + } + + /* Make sure notifications were received */ + for (i = 0; i < ARRAY_SIZE(pids); i++) { + EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + } +} + +/* get_proc_syscall - Get the syscall in progress for a given pid + * + * Returns the current syscall number for a given process + * Returns -1 if not in syscall (running or blocked) + */ +static long get_proc_syscall(struct __test_metadata *_metadata, int pid) +{ + char proc_path[100] = {0}; + long ret = -1; + ssize_t nread; + char *line; + + snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); + nread = get_nth(_metadata, proc_path, 1, &line); + ASSERT_GT(nread, 0); + + if (!strncmp("running", line, MIN(7, nread))) + ret = strtol(line, NULL, 16); + + free(line); + return ret; +} + +/* Ensure non-fatal signals prior to receive are unmodified */ +TEST(user_notification_wait_killable_pre_notification) +{ + struct sigaction new_action = { + .sa_handler = signal_handler, + }; + int listener, status, sk_pair[2]; + pid_t pid; + long ret; + char c; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); + + listener = user_notif_syscall( + __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + /* + * Check that we can kill the process with SIGUSR1 prior to receiving + * the notification. SIGUSR1 is wired up to a custom signal handler, + * and make sure it gets called. + */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + close(sk_pair[0]); + handled = sk_pair[1]; + + /* Setup the non-fatal sigaction without SA_RESTART */ + if (sigaction(SIGUSR1, &new_action, NULL)) { + perror("sigaction"); + exit(1); + } + + ret = syscall(__NR_getppid); + /* Make sure we got a return from a signal interruption */ + exit(ret != -1 || errno != EINTR); + } + + /* + * Make sure we've gotten to the seccomp user notification wait + * from getppid prior to sending any signals + */ + while (get_proc_syscall(_metadata, pid) != __NR_getppid && + get_proc_stat(_metadata, pid) != 'S') + nanosleep(&delay, NULL); + + /* Send non-fatal kill signal */ + EXPECT_EQ(kill(pid, SIGUSR1), 0); + + /* wait for process to exit (exit checks for EINTR) */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + EXPECT_EQ(read(sk_pair[0], &c, 1), 1); +} + +/* Ensure non-fatal signals after receive are blocked */ +TEST(user_notification_wait_killable) +{ + struct sigaction new_action = { + .sa_handler = signal_handler, + }; + struct seccomp_notif_resp resp = {}; + struct seccomp_notif req = {}; + int listener, status, sk_pair[2]; + pid_t pid; + long ret; + char c; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); + + listener = user_notif_syscall( + __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + close(sk_pair[0]); + handled = sk_pair[1]; + + /* Setup the sigaction without SA_RESTART */ + if (sigaction(SIGUSR1, &new_action, NULL)) { + perror("sigaction"); + exit(1); + } + + /* Make sure that the syscall is completed (no EINTR) */ + ret = syscall(__NR_getppid); + exit(ret != USER_NOTIF_MAGIC); + } + + /* + * Get the notification, to make move the notifying process into a + * non-preemptible (TASK_KILLABLE) state. + */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* Send non-fatal kill signal */ + EXPECT_EQ(kill(pid, SIGUSR1), 0); + + /* + * Make sure the task enters moves to TASK_KILLABLE by waiting for + * D (Disk Sleep) state after receiving non-fatal signal. + */ + while (get_proc_stat(_metadata, pid) != 'D') + nanosleep(&delay, NULL); + + resp.id = req.id; + resp.val = USER_NOTIF_MAGIC; + /* Make sure the notification is found and able to be replied to */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* + * Make sure that the signal handler does get called once we're back in + * userspace. + */ + EXPECT_EQ(read(sk_pair[0], &c, 1), 1); + /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +/* Ensure fatal signals after receive are not blocked */ +TEST(user_notification_wait_killable_fatal) +{ + struct seccomp_notif req = {}; + int listener, status; + pid_t pid; + long ret; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall( + __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* This should never complete as it should get a SIGTERM */ + syscall(__NR_getppid); + exit(1); + } + + while (get_proc_stat(_metadata, pid) != 'S') + nanosleep(&delay, NULL); + + /* + * Get the notification, to make move the notifying process into a + * non-preemptible (TASK_KILLABLE) state. + */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* Kill the process with a fatal signal */ + EXPECT_EQ(kill(pid, SIGTERM), 0); + + /* + * Wait for the process to exit, and make sure the process terminated + * due to the SIGTERM signal. + */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFSIGNALED(status)); + EXPECT_EQ(SIGTERM, WTERMSIG(status)); +} + /* * TODO: * - expand NNP testing diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index e4a4ce2b826d..b078ce9c6d2a 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -80,19 +80,6 @@ static inline void __write_pkey_reg(u64 pkey_reg) assert(pkey_reg == __read_pkey_reg()); } -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ @@ -104,9 +91,7 @@ static inline int cpu_has_pkeys(void) unsigned int ecx; unsigned int edx; - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx); if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n"); @@ -142,9 +127,7 @@ int pkey_reg_xstate_offset(void) /* assume that XSTATE_PKEY is set in XCR0 */ leaf = XSTATE_PKEY_BIT; { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx); if (leaf == XSTATE_PKEY_BIT) { xstate_offset = ebx; diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 2189f0322d8b..625e42901237 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -17,6 +17,8 @@ #include <sys/syscall.h> #include <sys/wait.h> +#include "../kselftest.h" /* For __cpuid_count() */ + #ifndef __x86_64__ # error This test is 64-bit only #endif @@ -45,13 +47,6 @@ static inline uint64_t xgetbv(uint32_t index) return eax + ((uint64_t)edx << 32); } -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) -{ - asm volatile("cpuid;" - : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) { uint32_t rfbm_lo = rfbm; @@ -115,9 +110,7 @@ static inline void check_cpuid_xsave(void) * support for the XSAVE feature set, including * XGETBV. */ - eax = 1; - ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(1, 0, eax, ebx, ecx, edx); if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) fatal_error("cpuid: no CPU xsave support"); if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) @@ -140,9 +133,8 @@ static void check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; - eax = CPUID_LEAF_XSTATE; - ecx = CPUID_SUBLEAF_XSTATE_USER; - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, + eax, ebx, ecx, edx); /* * EBX enumerates the size (in bytes) required by the XSAVE @@ -153,10 +145,8 @@ static void check_cpuid_xtiledata(void) */ xbuf_size = ebx; - eax = CPUID_LEAF_XSTATE; - ecx = XFEATURE_XTILEDATA; - - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA, + eax, ebx, ecx, edx); /* * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c index ab8599c10ce5..cf9ce8fbb656 100644 --- a/tools/testing/selftests/x86/corrupt_xstate_header.c +++ b/tools/testing/selftests/x86/corrupt_xstate_header.c @@ -17,25 +17,13 @@ #include <stdint.h> #include <sys/wait.h> -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} +#include "../kselftest.h" /* For __cpuid_count() */ static inline int xsave_enabled(void) { unsigned int eax, ebx, ecx, edx; - eax = 0x1; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(0x1, 0x0, eax, ebx, ecx, edx); /* Is CR4.OSXSAVE enabled ? */ return ecx & (1U << 27); diff --git a/tools/thermal/lib/Build b/tools/thermal/lib/Build new file mode 100644 index 000000000000..06f22760a272 --- /dev/null +++ b/tools/thermal/lib/Build @@ -0,0 +1,3 @@ +libthermal_tools-y += mainloop.o +libthermal_tools-y += log.o +libthermal_tools-y += uptimeofday.o diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile new file mode 100644 index 000000000000..82db451935c5 --- /dev/null +++ b/tools/thermal/lib/Makefile @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_TOOLS_VERSION = 0 +LIBTHERMAL_TOOLS_PATCHLEVEL = 0 +LIBTHERMAL_TOOLS_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +INCLUDES = \ +-I/usr/include/libnl3 \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +PATCHLEVEL = $(LIBTHERMAL_TOOLS_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_TOOLS_EXTRAVERSION) +VERSION = $(LIBTHERMAL_TOOLS_VERSION).$(LIBTHERMAL_TOOLS_PATCHLEVEL).$(LIBTHERMAL_TOOLS_EXTRAVERSION) + +LIBTHERMAL_TOOLS_SO := $(OUTPUT)libthermal_tools.so.$(VERSION) +LIBTHERMAL_TOOLS_A := $(OUTPUT)libthermal_tools.a +LIBTHERMAL_TOOLS_IN := $(OUTPUT)libthermal_tools-in.o +LIBTHERMAL_TOOLS_PC := $(OUTPUT)libthermal_tools.pc + +LIBTHERMAL_TOOLS_ALL := $(LIBTHERMAL_TOOLS_A) $(OUTPUT)libthermal_tools.so* + +$(LIBTHERMAL_TOOLS_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal_tools + +$(LIBTHERMAL_TOOLS_A): $(LIBTHERMAL_TOOLS_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_TOOLS_IN) + +$(LIBTHERMAL_TOOLS_SO): $(LIBTHERMAL_TOOLS_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal_tools.so $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal_tools.so + @ln -sf $(@F) $(OUTPUT)libthermal_tools.so.$(LIBTHERMAL_TOOLS_VERSION) + + +libs: $(LIBTHERMAL_TOOLS_A) $(LIBTHERMAL_TOOLS_SO) $(LIBTHERMAL_TOOLS_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal_tools) $(RM) $(LIBTHERMAL_TOOLS_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_TOOLS_VERSION) .*.d .*.cmd LIBTHERMAL_TOOLS-CFLAGS $(LIBTHERMAL_TOOLS_PC) + +$(LIBTHERMAL_TOOLS_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal_tools.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBTHERMAL_TOOLS_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_TOOLS_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_PC)) \ + $(call do_install,$(LIBTHERMAL_TOOLS_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +#install: install_lib install_headers install_pkgconfig install_doc +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/thermal/lib/libthermal_tools.pc.template b/tools/thermal/lib/libthermal_tools.pc.template new file mode 100644 index 000000000000..6f3769731b59 --- /dev/null +++ b/tools/thermal/lib/libthermal_tools.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I{include}/libnl3 diff --git a/tools/thermal/lib/log.c b/tools/thermal/lib/log.c new file mode 100644 index 000000000000..597d6e7f7858 --- /dev/null +++ b/tools/thermal/lib/log.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <syslog.h> +#include "log.h" + +static const char *__ident = "unknown"; +static int __options; + +static const char * const loglvl[] = { + [LOG_DEBUG] = "DEBUG", + [LOG_INFO] = "INFO", + [LOG_NOTICE] = "NOTICE", + [LOG_WARNING] = "WARN", + [LOG_ERR] = "ERROR", + [LOG_CRIT] = "CRITICAL", + [LOG_ALERT] = "ALERT", + [LOG_EMERG] = "EMERG", +}; + +int log_str2level(const char *lvl) +{ + int i; + + for (i = 0; i < sizeof(loglvl) / sizeof(loglvl[LOG_DEBUG]); i++) + if (!strcmp(lvl, loglvl[i])) + return i; + + return LOG_DEBUG; +} + +extern void logit(int level, const char *format, ...) +{ + va_list args; + + va_start(args, format); + + if (__options & TO_SYSLOG) + vsyslog(level, format, args); + + if (__options & TO_STDERR) + vfprintf(stderr, format, args); + + if (__options & TO_STDOUT) + vfprintf(stdout, format, args); + + va_end(args); +} + +int log_init(int level, const char *ident, int options) +{ + if (!options) + return -1; + + if (level > LOG_DEBUG) + return -1; + + if (!ident) + return -1; + + __ident = ident; + __options = options; + + if (options & TO_SYSLOG) { + openlog(__ident, options | LOG_NDELAY, LOG_USER); + setlogmask(LOG_UPTO(level)); + } + + return 0; +} + +void log_exit(void) +{ + closelog(); +} diff --git a/tools/thermal/lib/log.h b/tools/thermal/lib/log.h new file mode 100644 index 000000000000..be8ab5144938 --- /dev/null +++ b/tools/thermal/lib/log.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_LOG_H +#define __THERMAL_TOOLS_LOG_H + +#include <syslog.h> + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#define TO_SYSLOG 0x1 +#define TO_STDOUT 0x2 +#define TO_STDERR 0x4 + +extern void logit(int level, const char *format, ...); + +#define DEBUG(fmt, ...) logit(LOG_DEBUG, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__) +#define INFO(fmt, ...) logit(LOG_INFO, fmt, ##__VA_ARGS__) +#define NOTICE(fmt, ...) logit(LOG_NOTICE, fmt, ##__VA_ARGS__) +#define WARN(fmt, ...) logit(LOG_WARNING, fmt, ##__VA_ARGS__) +#define ERROR(fmt, ...) logit(LOG_ERR, fmt, ##__VA_ARGS__) +#define CRITICAL(fmt, ...) logit(LOG_CRIT, fmt, ##__VA_ARGS__) +#define ALERT(fmt, ...) logit(LOG_ALERT, fmt, ##__VA_ARGS__) +#define EMERG(fmt, ...) logit(LOG_EMERG, fmt, ##__VA_ARGS__) + +int log_init(int level, const char *ident, int options); +int log_str2level(const char *lvl); +void log_exit(void); + +#endif diff --git a/tools/thermal/lib/mainloop.c b/tools/thermal/lib/mainloop.c new file mode 100644 index 000000000000..94cbbcbd1c14 --- /dev/null +++ b/tools/thermal/lib/mainloop.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/epoll.h> +#include "mainloop.h" +#include "log.h" + +static int epfd = -1; +static unsigned short nrhandler; +static sig_atomic_t exit_mainloop; + +struct mainloop_data { + mainloop_callback_t cb; + void *data; + int fd; +}; + +static struct mainloop_data **mds; + +#define MAX_EVENTS 10 + +int mainloop(unsigned int timeout) +{ + int i, nfds; + struct epoll_event events[MAX_EVENTS]; + struct mainloop_data *md; + + if (epfd < 0) + return -1; + + for (;;) { + + nfds = epoll_wait(epfd, events, MAX_EVENTS, timeout); + + if (exit_mainloop || !nfds) + return 0; + + if (nfds < 0) { + if (errno == EINTR) + continue; + return -1; + } + + for (i = 0; i < nfds; i++) { + md = events[i].data.ptr; + + if (md->cb(md->fd, md->data) > 0) + return 0; + } + } +} + +int mainloop_add(int fd, mainloop_callback_t cb, void *data) +{ + struct epoll_event ev = { + .events = EPOLLIN, + }; + + struct mainloop_data *md; + + if (fd >= nrhandler) { + mds = realloc(mds, sizeof(*mds) * (fd + 1)); + if (!mds) + return -1; + nrhandler = fd + 1; + } + + md = malloc(sizeof(*md)); + if (!md) + return -1; + + md->data = data; + md->cb = cb; + md->fd = fd; + + mds[fd] = md; + ev.data.ptr = md; + + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) { + free(md); + return -1; + } + + return 0; +} + +int mainloop_del(int fd) +{ + if (fd >= nrhandler) + return -1; + + if (epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL) < 0) + return -1; + + free(mds[fd]); + + return 0; +} + +int mainloop_init(void) +{ + epfd = epoll_create(2); + if (epfd < 0) + return -1; + + return 0; +} + +void mainloop_exit(void) +{ + exit_mainloop = 1; +} + +void mainloop_fini(void) +{ + close(epfd); +} diff --git a/tools/thermal/lib/mainloop.h b/tools/thermal/lib/mainloop.h new file mode 100644 index 000000000000..89b61e89d905 --- /dev/null +++ b/tools/thermal/lib/mainloop.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_MAINLOOP_H +#define __THERMAL_TOOLS_MAINLOOP_H + +typedef int (*mainloop_callback_t)(int fd, void *data); + +extern int mainloop(unsigned int timeout); +extern int mainloop_add(int fd, mainloop_callback_t cb, void *data); +extern int mainloop_del(int fd); +extern void mainloop_exit(void); +extern int mainloop_init(void); +extern void mainloop_fini(void); + +#endif diff --git a/tools/thermal/lib/thermal-tools.h b/tools/thermal/lib/thermal-tools.h new file mode 100644 index 000000000000..f43939a468a3 --- /dev/null +++ b/tools/thermal/lib/thermal-tools.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS +#define __THERMAL_TOOLS + +#include "log.h" +#include "mainloop.h" +#include "uptimeofday.h" + +#endif diff --git a/tools/thermal/lib/uptimeofday.c b/tools/thermal/lib/uptimeofday.c new file mode 100644 index 000000000000..dacb02956a68 --- /dev/null +++ b/tools/thermal/lib/uptimeofday.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <sys/time.h> +#include <linux/sysinfo.h> +#include "thermal-tools.h" + +static unsigned long __offset; +static struct timeval __tv; + +int uptimeofday_init(void) +{ + struct sysinfo info; + + if (sysinfo(&info)) + return -1; + + gettimeofday(&__tv, NULL); + + __offset = __tv.tv_sec - info.uptime; + + return 0; +} + +unsigned long getuptimeofday_ms(void) +{ + gettimeofday(&__tv, NULL); + + return ((__tv.tv_sec - __offset) * 1000) + (__tv.tv_usec / 1000); +} + +struct timespec msec_to_timespec(int msec) +{ + struct timespec tv = { + .tv_sec = (msec / 1000), + .tv_nsec = (msec % 1000) * 1000000, + }; + + return tv; +} diff --git a/tools/thermal/lib/uptimeofday.h b/tools/thermal/lib/uptimeofday.h new file mode 100644 index 000000000000..c0da5de41325 --- /dev/null +++ b/tools/thermal/lib/uptimeofday.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_UPTIMEOFDAY_H +#define __THERMAL_TOOLS_UPTIMEOFDAY_H +#include <sys/sysinfo.h> +#include <sys/time.h> + +int uptimeofday_init(void); +unsigned long getuptimeofday_ms(void); +struct timespec msec_to_timespec(int msec); + +#endif diff --git a/tools/thermal/thermal-engine/Build b/tools/thermal/thermal-engine/Build new file mode 100644 index 000000000000..20c3c478b88d --- /dev/null +++ b/tools/thermal/thermal-engine/Build @@ -0,0 +1 @@ +thermal-engine-y += thermal-engine.o diff --git a/tools/thermal/thermal-engine/Makefile b/tools/thermal/thermal-engine/Makefile new file mode 100644 index 000000000000..6bd05ff89485 --- /dev/null +++ b/tools/thermal/thermal-engine/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for thermal tools + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +CFLAGS = -Wall -Wextra +CFLAGS += -I$(srctree)/tools/thermal/lib +CFLAGS += -I$(srctree)/tools/lib/thermal/include + +LDFLAGS = -L$(srctree)/tools/thermal/lib +LDFLAGS += -L$(srctree)/tools/lib/thermal +LDFLAGS += -lthermal_tools +LDFLAGS += -lthermal +LDFLAGS += -lconfig +LDFLAGS += -lnl-genl-3 -lnl-3 + +VERSION = 0.0.1 + +all: thermal-engine +%: %.c + $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS) +clean: + $(RM) thermal-engine diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c new file mode 100644 index 000000000000..9b1476a2680f --- /dev/null +++ b/tools/thermal/thermal-engine/thermal-engine.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Thermal monitoring tool based on the thermal netlink events. + * + * Copyright (C) 2022 Linaro Ltd. + * + * Author: Daniel Lezcano <daniel.lezcano@kernel.org> + */ +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <libgen.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <syslog.h> + +#include <sys/epoll.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <thermal.h> +#include "thermal-tools.h" + +struct options { + int loglevel; + int logopt; + int interactive; + int daemonize; +}; + +struct thermal_data { + struct thermal_zone *tz; + struct thermal_handler *th; +}; + +static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg) +{ + INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n", + tt->id, tt->type, tt->temp, tt->hyst); + + return 0; +} + +static int show_temp(struct thermal_zone *tz, __maybe_unused void *arg) +{ + thermal_cmd_get_temp(arg, tz); + + INFO("temperature: %d\n", tz->temp); + + return 0; +} + +static int show_governor(struct thermal_zone *tz, __maybe_unused void *arg) +{ + thermal_cmd_get_governor(arg, tz); + + INFO("governor: '%s'\n", tz->governor); + + return 0; +} + +static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) +{ + INFO("thermal zone '%s', id=%d\n", tz->name, tz->id); + + for_each_thermal_trip(tz->trip, show_trip, NULL); + + show_temp(tz, arg); + + show_governor(tz, arg); + + return 0; +} + +static int tz_create(const char *name, int tz_id, __maybe_unused void *arg) +{ + INFO("Thermal zone '%s'/%d created\n", name, tz_id); + + return 0; +} + +static int tz_delete(int tz_id, __maybe_unused void *arg) +{ + INFO("Thermal zone %d deleted\n", tz_id); + + return 0; +} + +static int tz_disable(int tz_id, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s') disabled\n", tz_id, tz->name); + + return 0; +} + +static int tz_enable(int tz_id, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s') enabled\n", tz_id, tz->name); + + return 0; +} + +static int trip_high(int tz_id, int trip_id, int temp, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s'): trip point %d crossed way up with %d °C\n", + tz_id, tz->name, trip_id, temp); + + return 0; +} + +static int trip_low(int tz_id, int trip_id, int temp, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s'): trip point %d crossed way down with %d °C\n", + tz_id, tz->name, trip_id, temp); + + return 0; +} + +static int trip_add(int tz_id, int trip_id, int type, int temp, int hyst, __maybe_unused void *arg) +{ + INFO("Trip point added %d: id=%d, type=%d, temp=%d, hyst=%d\n", + tz_id, trip_id, type, temp, hyst); + + return 0; +} + +static int trip_delete(int tz_id, int trip_id, __maybe_unused void *arg) +{ + INFO("Trip point deleted %d: id=%d\n", tz_id, trip_id); + + return 0; +} + +static int trip_change(int tz_id, int trip_id, int type, int temp, + int hyst, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Trip point changed %d: id=%d, type=%d, temp=%d, hyst=%d\n", + tz_id, trip_id, type, temp, hyst); + + tz->trip[trip_id].type = type; + tz->trip[trip_id].temp = temp; + tz->trip[trip_id].hyst = hyst; + + return 0; +} + +static int cdev_add(const char *name, int cdev_id, int max_state, __maybe_unused void *arg) +{ + INFO("Cooling device '%s'/%d (max state=%d) added\n", name, cdev_id, max_state); + + return 0; +} + +static int cdev_delete(int cdev_id, __maybe_unused void *arg) +{ + INFO("Cooling device %d deleted", cdev_id); + + return 0; +} + +static int cdev_update(int cdev_id, int cur_state, __maybe_unused void *arg) +{ + INFO("cdev:%d state:%d\n", cdev_id, cur_state); + + return 0; +} + +static int gov_change(int tz_id, const char *name, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("%s: governor changed %s -> %s\n", tz->name, tz->governor, name); + + strcpy(tz->governor, name); + + return 0; +} + +static struct thermal_ops ops = { + .events.tz_create = tz_create, + .events.tz_delete = tz_delete, + .events.tz_disable = tz_disable, + .events.tz_enable = tz_enable, + .events.trip_high = trip_high, + .events.trip_low = trip_low, + .events.trip_add = trip_add, + .events.trip_delete = trip_delete, + .events.trip_change = trip_change, + .events.cdev_add = cdev_add, + .events.cdev_delete = cdev_delete, + .events.cdev_update = cdev_update, + .events.gov_change = gov_change +}; + +static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + + return thermal_events_handle(td->th, td); +} + +static void usage(const char *cmd) +{ + printf("%s : A thermal monitoring engine based on notifications\n", cmd); + printf("Usage: %s [options]\n", cmd); + printf("\t-h, --help\t\tthis help\n"); + printf("\t-d, --daemonize\n"); + printf("\t-l <level>, --loglevel <level>\tlog level: "); + printf("DEBUG, INFO, NOTICE, WARN, ERROR\n"); + printf("\t-s, --syslog\t\toutput to syslog\n"); + printf("\n"); + exit(0); +} + +static int options_init(int argc, char *argv[], struct options *options) +{ + int opt; + + struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "daemonize", no_argument, NULL, 'd' }, + { "syslog", no_argument, NULL, 's' }, + { "loglevel", required_argument, NULL, 'l' }, + { 0, 0, 0, 0 } + }; + + while (1) { + + int optindex = 0; + + opt = getopt_long(argc, argv, "l:dhs", long_options, &optindex); + if (opt == -1) + break; + + switch (opt) { + case 'l': + options->loglevel = log_str2level(optarg); + break; + case 'd': + options->daemonize = 1; + break; + case 's': + options->logopt = TO_SYSLOG; + break; + case 'h': + usage(basename(argv[0])); + break; + default: /* '?' */ + return -1; + } + } + + return 0; +} + +enum { + THERMAL_ENGINE_SUCCESS = 0, + THERMAL_ENGINE_OPTION_ERROR, + THERMAL_ENGINE_DAEMON_ERROR, + THERMAL_ENGINE_LOG_ERROR, + THERMAL_ENGINE_THERMAL_ERROR, + THERMAL_ENGINE_MAINLOOP_ERROR, +}; + +int main(int argc, char *argv[]) +{ + struct thermal_data td; + struct options options = { + .loglevel = LOG_INFO, + .logopt = TO_STDOUT, + }; + + if (options_init(argc, argv, &options)) { + ERROR("Usage: %s --help\n", argv[0]); + return THERMAL_ENGINE_OPTION_ERROR; + } + + if (options.daemonize && daemon(0, 0)) { + ERROR("Failed to daemonize: %p\n"); + return THERMAL_ENGINE_DAEMON_ERROR; + } + + if (log_init(options.loglevel, basename(argv[0]), options.logopt)) { + ERROR("Failed to initialize logging facility\n"); + return THERMAL_ENGINE_LOG_ERROR; + } + + td.th = thermal_init(&ops); + if (!td.th) { + ERROR("Failed to initialize the thermal library\n"); + return THERMAL_ENGINE_THERMAL_ERROR; + } + + td.tz = thermal_zone_discover(td.th); + if (!td.tz) { + ERROR("No thermal zone available\n"); + return THERMAL_ENGINE_THERMAL_ERROR; + } + + for_each_thermal_zone(td.tz, show_tz, td.th); + + if (mainloop_init()) { + ERROR("Failed to initialize the mainloop\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + if (mainloop_add(thermal_events_fd(td.th), thermal_event, &td)) { + ERROR("Failed to setup the mainloop\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + INFO("Waiting for thermal events ...\n"); + + if (mainloop(-1)) { + ERROR("Mainloop failed\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + return THERMAL_ENGINE_SUCCESS; +} diff --git a/tools/thermal/thermometer/Build b/tools/thermal/thermometer/Build new file mode 100644 index 000000000000..1b96c159c3c8 --- /dev/null +++ b/tools/thermal/thermometer/Build @@ -0,0 +1 @@ +thermometer-y += thermometer.o diff --git a/tools/thermal/thermometer/Makefile b/tools/thermal/thermometer/Makefile new file mode 100644 index 000000000000..d8f8bc82fe3b --- /dev/null +++ b/tools/thermal/thermometer/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for cgroup tools + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +CFLAGS = -Wall -Wextra +CFLAGS += -I$(srctree)/tools/thermal/lib + +LDFLAGS = -L$(srctree)/tools/thermal/lib +LDFLAGS += -lthermal_tools +LDFLAGS += -lconfig + +VERSION = 0.0.1 +TARGET=thermometer + +all: $(TARGET) +%: %.c + $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS) + +clean: + $(RM) $(TARGET) diff --git a/tools/thermal/thermometer/thermometer.8 b/tools/thermal/thermometer/thermometer.8 new file mode 100644 index 000000000000..d090fbca4cba --- /dev/null +++ b/tools/thermal/thermometer/thermometer.8 @@ -0,0 +1,92 @@ +.TH THERMOMETER 8 +# SPDX-License-Identifier: GPL-2.0 +.SH NAME +\fBthermometer\fP - A thermal profiling tool + +.SH SYNOPSIS +.ft B +.B thermometer +.RB [ options ] +.RB [ command ] +.br +.SH DESCRIPTION +\fBthermometer \fP captures the thermal zones temperature at a +specified sampling period. It is optimized to reduce as much as +possible the overhead while doing the temperature acquisition in order +to prevent disrupting the running application we may want to profile. + +This low overhead also allows a high rate sampling for the temperature +which could be necessary to spot overshots and undershots. + +If no configuration file is specified, then all the thermal zones will +be monitored at 4Hz, so every 250ms. A configuration file specifies +the thermal zone names and the desired sampling period. A thermal zone +name can be a regular expression to specify a group of thermal zone. + +The sampling of the different thermal zones will be written into +separate files with the thermal zone name. It is possible to specify a +postfix to identify them for example for a specific scenario. The +output directory can be specified in addition. + +Without any parameters, \fBthermometer \fP captures all the thermal +zone temperatures every 250ms and write to the current directory the +captured files postfixed with the current date. + +If a running \fBduration\fP is specified or a \fBcommand\fP, the +capture ends at the end of the duration if the command did not +finished before. The \fBduration\fP can be specified alone as well as +the \fBcommand\fP. If none is specified, the capture will continue +indefinitively until interrupted by \fBSIGINT\fP or \fBSIGQUIT\fP. +.PP + +.SS Options +.PP +The \fB-h, --help\fP option shows a short usage help +.PP +The \fB-o <dir>, --output <dir>\fP option defines the output directory to put the +sampling files +.PP +The \fB-c <config>, --config <config>\fP option specifies the configuration file to use +.PP +The \fB-d <seconds>, --duration <seconds>\fP option specifies the duration of the capture +.PP +The \fB-l <loglevel>, --loglevel <loglevel>\fP option sets the loglevel [DEBUG,INFO,NOTICE,WARN,ERROR] +.PP +The \fB-p <string>, --postfix <string>\fP option appends \fBstring\fP at the end of the capture filenames +.PP +The \fB-s, --syslog\fP option sets the output to syslog, default is \fBstdout\fP +.PP +The \fB-w, --overwrite\fP overwrites the output files if they exist +.PP + +.PP + +.SS "Exit status:" +.TP +0 +if OK, +.TP +1 +Error with the options specified as parameters +.TP +2 +Error when configuring the logging facility +.TP +3 +Error when configuring the time +.TP +4 +Error in the initialization routine +.TP +5 +Error during the runtime + +.SH Capture file format + +Every file contains two columns. The first one is the uptime timestamp +in order to find a point in time since the system started up if there +is any thermal event. The second one is the temperature in milli +degree. The first line contains the label of each column. + +.SH AUTHOR +Daniel Lezcano <daniel.lezcano@kernel.org> diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c new file mode 100644 index 000000000000..1a87a0a77f9f --- /dev/null +++ b/tools/thermal/thermometer/thermometer.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <dirent.h> +#include <fcntl.h> +#include <getopt.h> +#include <regex.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/signalfd.h> +#include <sys/timerfd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <linux/thermal.h> + +#include <libconfig.h> +#include "thermal-tools.h" + +#define CLASS_THERMAL "/sys/class/thermal" + +enum { + THERMOMETER_SUCCESS = 0, + THERMOMETER_OPTION_ERROR, + THERMOMETER_LOG_ERROR, + THERMOMETER_CONFIG_ERROR, + THERMOMETER_TIME_ERROR, + THERMOMETER_INIT_ERROR, + THERMOMETER_RUNTIME_ERROR +}; + +struct options { + int loglvl; + int logopt; + int overwrite; + int duration; + const char *config; + char postfix[PATH_MAX]; + char output[PATH_MAX]; +}; + +struct tz_regex { + regex_t regex; + int polling; +}; + +struct configuration { + struct tz_regex *tz_regex; + int nr_tz_regex; + +}; + +struct tz { + FILE *file_out; + int fd_temp; + int fd_timer; + int polling; + const char *name; +}; + +struct thermometer { + struct tz *tz; + int nr_tz; +}; + +static struct tz_regex *configuration_tz_match(const char *expr, + struct configuration *config) +{ + int i; + + for (i = 0; i < config->nr_tz_regex; i++) { + + if (!regexec(&config->tz_regex[i].regex, expr, 0, NULL, 0)) + return &config->tz_regex[i]; + } + + return NULL; +} + +static int configuration_default_init(struct configuration *config) +{ + config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) * + (config->nr_tz_regex + 1)); + + if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, ".*", + REG_NOSUB | REG_EXTENDED)) { + ERROR("Invalid regular expression\n"); + return -1; + } + + config->tz_regex[config->nr_tz_regex].polling = 250; + config->nr_tz_regex = 1; + + return 0; +} + +static int configuration_init(const char *path, struct configuration *config) +{ + config_t cfg; + + config_setting_t *tz; + int i, length; + + if (path && access(path, F_OK)) { + ERROR("'%s' is not accessible\n", path); + return -1; + } + + if (!path && !config->nr_tz_regex) { + INFO("No thermal zones configured, using wildcard for all of them\n"); + return configuration_default_init(config); + } + + config_init(&cfg); + + if (!config_read_file(&cfg, path)) { + ERROR("Failed to parse %s:%d - %s\n", config_error_file(&cfg), + config_error_line(&cfg), config_error_text(&cfg)); + + return -1; + } + + tz = config_lookup(&cfg, "thermal-zones"); + if (!tz) { + ERROR("No thermal zone configured to be monitored\n"); + return -1; + } + + length = config_setting_length(tz); + + INFO("Found %d thermal zone(s) regular expression\n", length); + + for (i = 0; i < length; i++) { + + config_setting_t *node; + const char *name; + int polling; + + node = config_setting_get_elem(tz, i); + if (!node) { + ERROR("Missing node name '%d'\n", i); + return -1; + } + + if (!config_setting_lookup_string(node, "name", &name)) { + ERROR("Thermal zone name not found\n"); + return -1; + } + + if (!config_setting_lookup_int(node, "polling", &polling)) { + ERROR("Polling value not found"); + return -1; + } + + config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) * + (config->nr_tz_regex + 1)); + + if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, name, + REG_NOSUB | REG_EXTENDED)) { + ERROR("Invalid regular expression '%s'\n", name); + continue; + } + + config->tz_regex[config->nr_tz_regex].polling = polling; + config->nr_tz_regex++; + + INFO("Thermal zone regular expression '%s' with polling %d\n", + name, polling); + } + + return 0; +} + +static void usage(const char *cmd) +{ + printf("%s Version: %s\n", cmd, VERSION); + printf("Usage: %s [options]\n", cmd); + printf("\t-h, --help\t\tthis help\n"); + printf("\t-o, --output <dir>\toutput directory for temperature capture\n"); + printf("\t-c, --config <file>\tconfiguration file\n"); + printf("\t-d, --duration <seconds>\tcapture duration\n"); + printf("\t-l, --loglevel <level>\tlog level: "); + printf("DEBUG, INFO, NOTICE, WARN, ERROR\n"); + printf("\t-p, --postfix <string>\tpostfix to be happened at the end of the files\n"); + printf("\t-s, --syslog\t\toutput to syslog\n"); + printf("\t-w, --overwrite\t\toverwrite the temperature capture files if they exist\n"); + printf("\n"); + exit(0); +} + +static int options_init(int argc, char *argv[], struct options *options) +{ + int opt; + time_t now = time(NULL); + + struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "config", required_argument, NULL, 'c' }, + { "duration", required_argument, NULL, 'd' }, + { "loglevel", required_argument, NULL, 'l' }, + { "postfix", required_argument, NULL, 'p' }, + { "output", required_argument, NULL, 'o' }, + { "syslog", required_argument, NULL, 's' }, + { "overwrite", no_argument, NULL, 'w' }, + { 0, 0, 0, 0 } + }; + + strftime(options->postfix, sizeof(options->postfix), + "-%Y-%m-%d_%H:%M:%S", gmtime(&now)); + + while (1) { + + int optindex = 0; + + opt = getopt_long(argc, argv, "ho:c:d:l:p:sw", long_options, &optindex); + if (opt == -1) + break; + + switch (opt) { + case 'c': + options->config = optarg; + break; + case 'd': + options->duration = atoi(optarg) * 1000; + break; + case 'l': + options->loglvl = log_str2level(optarg); + break; + case 'h': + usage(basename(argv[0])); + break; + case 'p': + strcpy(options->postfix, optarg); + break; + case 'o': + strcpy(options->output, optarg); + break; + case 's': + options->logopt = TO_SYSLOG; + break; + case 'w': + options->overwrite = 1; + break; + default: /* '?' */ + ERROR("Usage: %s --help\n", argv[0]); + return -1; + } + } + + return 0; +} + +static int thermometer_add_tz(const char *path, const char *name, int polling, + struct thermometer *thermometer) +{ + int fd; + char tz_path[PATH_MAX]; + + sprintf(tz_path, CLASS_THERMAL"/%s/temp", path); + + fd = open(tz_path, O_RDONLY); + if (fd < 0) { + ERROR("Failed to open '%s': %m\n", tz_path); + return -1; + } + + thermometer->tz = realloc(thermometer->tz, + sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); + if (!thermometer->tz) { + ERROR("Failed to allocate thermometer->tz\n"); + return -1; + } + + thermometer->tz[thermometer->nr_tz].fd_temp = fd; + thermometer->tz[thermometer->nr_tz].name = strdup(name); + thermometer->tz[thermometer->nr_tz].polling = polling; + thermometer->nr_tz++; + + INFO("Added thermal zone '%s->%s (polling:%d)'\n", path, name, polling); + + return 0; +} + +static int thermometer_init(struct configuration *config, + struct thermometer *thermometer) +{ + DIR *dir; + struct dirent *dirent; + struct tz_regex *tz_regex; + const char *tz_dirname = "thermal_zone"; + + if (mainloop_init()) { + ERROR("Failed to start mainloop\n"); + return -1; + } + + dir = opendir(CLASS_THERMAL); + if (!dir) { + ERROR("failed to open '%s'\n", CLASS_THERMAL); + return -1; + } + + while ((dirent = readdir(dir))) { + char tz_type[THERMAL_NAME_LENGTH]; + char tz_path[PATH_MAX]; + FILE *tz_file; + + if (strncmp(dirent->d_name, tz_dirname, strlen(tz_dirname))) + continue; + + sprintf(tz_path, CLASS_THERMAL"/%s/type", dirent->d_name); + + tz_file = fopen(tz_path, "r"); + if (!tz_file) { + ERROR("Failed to open '%s': %m", tz_path); + continue; + } + + fscanf(tz_file, "%s", tz_type); + + fclose(tz_file); + + tz_regex = configuration_tz_match(tz_type, config); + if (!tz_regex) + continue; + + if (thermometer_add_tz(dirent->d_name, tz_type, + tz_regex->polling, thermometer)) + continue; + } + + closedir(dir); + + return 0; +} + +static int timer_temperature_callback(int fd, void *arg) +{ + struct tz *tz = arg; + char buf[16] = { 0 }; + + pread(tz->fd_temp, buf, sizeof(buf), 0); + + fprintf(tz->file_out, "%ld %s", getuptimeofday_ms(), buf); + + read(fd, buf, sizeof(buf)); + + return 0; +} + +static int thermometer_start(struct thermometer *thermometer, + struct options *options) +{ + struct itimerspec timer_it = { 0 }; + char *path; + FILE *f; + int i; + + INFO("Capturing %d thermal zone(s) temperature...\n", thermometer->nr_tz); + + if (access(options->output, F_OK) && mkdir(options->output, 0700)) { + ERROR("Failed to create directory '%s'\n", options->output); + return -1; + } + + for (i = 0; i < thermometer->nr_tz; i++) { + + asprintf(&path, "%s/%s%s", options->output, + thermometer->tz[i].name, options->postfix); + + if (!options->overwrite && !access(path, F_OK)) { + ERROR("'%s' already exists\n", path); + return -1; + } + + f = fopen(path, "w"); + if (!f) { + ERROR("Failed to create '%s':%m\n", path); + return -1; + } + + fprintf(f, "timestamp(ms) %s(°mC)\n", thermometer->tz[i].name); + + thermometer->tz[i].file_out = f; + + DEBUG("Created '%s' file for thermal zone '%s'\n", path, thermometer->tz[i].name); + + /* + * Create polling timer + */ + thermometer->tz[i].fd_timer = timerfd_create(CLOCK_MONOTONIC, 0); + if (thermometer->tz[i].fd_timer < 0) { + ERROR("Failed to create timer for '%s': %m\n", + thermometer->tz[i].name); + return -1; + } + + DEBUG("Watching '%s' every %d ms\n", + thermometer->tz[i].name, thermometer->tz[i].polling); + + timer_it.it_interval = timer_it.it_value = + msec_to_timespec(thermometer->tz[i].polling); + + if (timerfd_settime(thermometer->tz[i].fd_timer, 0, + &timer_it, NULL) < 0) + return -1; + + if (mainloop_add(thermometer->tz[i].fd_timer, + timer_temperature_callback, + &thermometer->tz[i])) + return -1; + } + + return 0; +} + +static int thermometer_execute(int argc, char *argv[], char *const envp[], pid_t *pid) +{ + if (!argc) + return 0; + + *pid = fork(); + if (*pid < 0) { + ERROR("Failed to fork process: %m"); + return -1; + } + + if (!(*pid)) { + execvpe(argv[0], argv, envp); + exit(1); + } + + return 0; +} + +static int kill_process(__maybe_unused int fd, void *arg) +{ + pid_t pid = *(pid_t *)arg; + + if (kill(pid, SIGTERM)) + ERROR("Failed to send SIGTERM signal to '%d': %p\n", pid); + else if (waitpid(pid, NULL, 0)) + ERROR("Failed to wait pid '%d': %p\n", pid); + + mainloop_exit(); + + return 0; +} + +static int exit_mainloop(__maybe_unused int fd, __maybe_unused void *arg) +{ + mainloop_exit(); + return 0; +} + +static int thermometer_wait(struct options *options, pid_t pid) +{ + int fd; + sigset_t mask; + + /* + * If there is a duration specified, we will exit the mainloop + * and gracefully close all the files which will flush the + * file system cache + */ + if (options->duration) { + struct itimerspec timer_it = { 0 }; + + timer_it.it_value = msec_to_timespec(options->duration); + + fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (fd < 0) { + ERROR("Failed to create duration timer: %m\n"); + return -1; + } + + if (timerfd_settime(fd, 0, &timer_it, NULL)) { + ERROR("Failed to set timer time: %m\n"); + return -1; + } + + if (mainloop_add(fd, pid < 0 ? exit_mainloop : kill_process, &pid)) { + ERROR("Failed to set timer exit mainloop callback\n"); + return -1; + } + } + + /* + * We want to catch any keyboard interrupt, as well as child + * signals if any in order to exit properly + */ + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGQUIT); + sigaddset(&mask, SIGCHLD); + + if (sigprocmask(SIG_BLOCK, &mask, NULL)) { + ERROR("Failed to set sigprocmask: %m\n"); + return -1; + } + + fd = signalfd(-1, &mask, 0); + if (fd < 0) { + ERROR("Failed to set the signalfd: %m\n"); + return -1; + } + + if (mainloop_add(fd, exit_mainloop, NULL)) { + ERROR("Failed to set timer exit mainloop callback\n"); + return -1; + } + + return mainloop(-1); +} + +static int thermometer_stop(struct thermometer *thermometer) +{ + int i; + + INFO("Closing/flushing output files\n"); + + for (i = 0; i < thermometer->nr_tz; i++) + fclose(thermometer->tz[i].file_out); + + return 0; +} + +int main(int argc, char *argv[], char *const envp[]) +{ + struct options options = { + .loglvl = LOG_DEBUG, + .logopt = TO_STDOUT, + .output = ".", + }; + struct configuration config = { 0 }; + struct thermometer thermometer = { 0 }; + + pid_t pid = -1; + + if (options_init(argc, argv, &options)) + return THERMOMETER_OPTION_ERROR; + + if (log_init(options.loglvl, argv[0], options.logopt)) + return THERMOMETER_LOG_ERROR; + + if (configuration_init(options.config, &config)) + return THERMOMETER_CONFIG_ERROR; + + if (uptimeofday_init()) + return THERMOMETER_TIME_ERROR; + + if (thermometer_init(&config, &thermometer)) + return THERMOMETER_INIT_ERROR; + + if (thermometer_start(&thermometer, &options)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_execute(argc - optind, &argv[optind], envp, &pid)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_wait(&options, pid)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_stop(&thermometer)) + return THERMOMETER_RUNTIME_ERROR; + + return THERMOMETER_SUCCESS; +} diff --git a/tools/thermal/thermometer/thermometer.conf b/tools/thermal/thermometer/thermometer.conf new file mode 100644 index 000000000000..02c6dab3b1b3 --- /dev/null +++ b/tools/thermal/thermometer/thermometer.conf @@ -0,0 +1,5 @@ + +thermal-zones = ( + { name = "cpu[0-9]-thermal"; + polling = 100; } + ) |