diff options
Diffstat (limited to 'tools')
73 files changed, 4211 insertions, 76 deletions
diff --git a/tools/Makefile b/tools/Makefile index 77f1aee8ea01..3dfd72ae6c1a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,6 +12,7 @@ help: @echo ' acpi - ACPI tools' @echo ' cgroup - cgroup tools' @echo ' cpupower - a tool for all things x86 CPU power' + @echo ' debugging - tools for debugging' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @echo ' firmware - Firmware tools' @echo ' freefall - laptop accelerometer program for disk protection' @@ -61,7 +62,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware: FORCE +cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE $(call descend,$@) liblockdep: FORCE @@ -96,7 +97,8 @@ kvm_stat: FORCE all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ - tmon freefall iio objtool kvm_stat wmi pci + tmon freefall iio objtool kvm_stat wmi \ + pci debugging acpi_install: $(call descend,power/$(@:_install=),install) @@ -104,7 +106,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -130,7 +132,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ - wmi_install pci_install + wmi_install pci_install debugging_install acpi_clean: $(call descend,power/acpi,clean) @@ -138,7 +140,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean: +cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -176,6 +178,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ - gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean + gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean .PHONY: FORCE diff --git a/tools/debugging/Makefile b/tools/debugging/Makefile new file mode 100644 index 000000000000..e2b7c1a6fb8f --- /dev/null +++ b/tools/debugging/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for debugging tools + +PREFIX ?= /usr +BINDIR ?= bin +INSTALL ?= install + +TARGET = kernel-chktaint + +all: $(TARGET) + +clean: + +install: kernel-chktaint + $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(BINDIR)/$(TARGET) + diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint new file mode 100755 index 000000000000..2240cb56e6e5 --- /dev/null +++ b/tools/debugging/kernel-chktaint @@ -0,0 +1,202 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Randy Dunlap <rdunlap@infradead.org>, 2018 +# Thorsten Leemhuis <linux@leemhuis.info>, 2018 + +usage() +{ + cat <<EOF +usage: ${0##*/} + ${0##*/} <int> + +Call without parameters to decode /proc/sys/kernel/tainted. + +Call with a positive integer as parameter to decode a value you +retrieved from /proc/sys/kernel/tainted on another system. + +EOF +} + +if [ "$1"x != "x" ]; then + if [ "$1"x == "--helpx" ] || [ "$1"x == "-hx" ] ; then + usage + exit 1 + elif [ $1 -ge 0 ] 2>/dev/null ; then + taint=$1 + else + echo "Error: Parameter '$1' not a positive interger. Aborting." >&2 + exit 1 + fi +else + TAINTFILE="/proc/sys/kernel/tainted" + if [ ! -r $TAINTFILE ]; then + echo "No file: $TAINTFILE" + exit + fi + + taint=`cat $TAINTFILE` +fi + +if [ $taint -eq 0 ]; then + echo "Kernel not Tainted" + exit +else + echo "Kernel is \"tainted\" for the following reasons:" +fi + +T=$taint +out= + +addout() { + out=$out$1 +} + +if [ `expr $T % 2` -eq 0 ]; then + addout "G" +else + addout "P" + echo " * proprietary module was loaded (#0)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "F" + echo " * module was force loaded (#1)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "S" + echo " * SMP kernel oops on an officially SMP incapable processor (#2)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "R" + echo " * module was force unloaded (#3)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "M" + echo " * processor reported a Machine Check Exception (MCE) (#4)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "B" + echo " * bad page referenced or some unexpected page flags (#5)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "U" + echo " * taint requested by userspace application (#6)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "D" + echo " * kernel died recently, i.e. there was an OOPS or BUG (#7)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "A" + echo " * an ACPI table was overridden by user (#8)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "W" + echo " * kernel issued warning (#9)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "C" + echo " * staging driver was loaded (#10)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "I" + echo " * workaround for bug in platform firmware applied (#11)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "O" + echo " * externally-built ('out-of-tree') module was loaded (#12)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "E" + echo " * unsigned module was loaded (#13)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "L" + echo " * soft lockup occurred (#14)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "K" + echo " * kernel has been live patched (#15)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "X" + echo " * auxiliary taint, defined for and used by distros (#16)" + +fi +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "T" + echo " * kernel was built with the struct randomization plugin (#17)" +fi + +echo "For a more detailed explanation of the various taint flags see" +echo " Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel sources" +echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html" +echo "Raw taint value as int/string: $taint/'$out'" +#EOF# diff --git a/tools/firmware/ihex2fw.c b/tools/firmware/ihex2fw.c index b58dd061e978..8925b60e51f5 100644 --- a/tools/firmware/ihex2fw.c +++ b/tools/firmware/ihex2fw.c @@ -24,6 +24,10 @@ #include <getopt.h> +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) + struct ihex_binrec { struct ihex_binrec *next; /* not part of the real data structure */ uint32_t addr; @@ -131,6 +135,7 @@ int main(int argc, char **argv) static int process_ihex(uint8_t *data, ssize_t size) { struct ihex_binrec *record; + size_t record_size; uint32_t offset = 0; uint32_t data32; uint8_t type, crc = 0, crcbyte = 0; @@ -157,12 +162,13 @@ next_record: len <<= 8; len += hex(data + i, &crc); i += 2; } - record = malloc((sizeof (*record) + len + 3) & ~3); + record_size = ALIGN(sizeof(*record) + len, 4); + record = malloc(record_size); if (!record) { fprintf(stderr, "out of memory for records\n"); return -ENOMEM; } - memset(record, 0, (sizeof(*record) + len + 3) & ~3); + memset(record, 0, record_size); record->len = len; /* now check if we have enough data to read everything */ @@ -259,13 +265,18 @@ static void file_record(struct ihex_binrec *record) *p = record; } +static uint16_t ihex_binrec_size(struct ihex_binrec *p) +{ + return p->len + sizeof(p->addr) + sizeof(p->len); +} + static int output_records(int outfd) { unsigned char zeroes[6] = {0, 0, 0, 0, 0, 0}; struct ihex_binrec *p = records; while (p) { - uint16_t writelen = (p->len + 9) & ~3; + uint16_t writelen = ALIGN(ihex_binrec_size(p), 4); p->addr = htonl(p->addr); p->len = htons(p->len); diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index ac2de6b7e89f..7bf9bde28bcc 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -60,6 +60,7 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_GRAVITY] = "gravity", [IIO_POSITIONRELATIVE] = "positionrelative", [IIO_PHASE] = "phase", + [IIO_MASSCONCENTRATION] = "massconcentration", }; static const char * const iio_ev_type_text[] = { @@ -114,7 +115,13 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_I] = "i", [IIO_MOD_Q] = "q", [IIO_MOD_CO2] = "co2", + [IIO_MOD_ETHANOL] = "ethanol", + [IIO_MOD_H2] = "h2", [IIO_MOD_VOC] = "voc", + [IIO_MOD_PM1] = "pm1", + [IIO_MOD_PM2P5] = "pm2p5", + [IIO_MOD_PM4] = "pm4", + [IIO_MOD_PM10] = "pm10", }; static bool event_is_known(struct iio_event_data *event) @@ -156,6 +163,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_GRAVITY: case IIO_POSITIONRELATIVE: case IIO_PHASE: + case IIO_MASSCONCENTRATION: break; default: return false; @@ -199,7 +207,13 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_I: case IIO_MOD_Q: case IIO_MOD_CO2: + case IIO_MOD_ETHANOL: + case IIO_MOD_H2: case IIO_MOD_VOC: + case IIO_MOD_PM1: + case IIO_MOD_PM2P5: + case IIO_MOD_PM4: + case IIO_MOD_PM10: break; default: return false; diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h new file mode 100644 index 000000000000..110b0e5d0fb0 --- /dev/null +++ b/tools/include/linux/numa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NUMA_H +#define _LINUX_NUMA_H + + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) + +#endif /* _LINUX_NUMA_H */ diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h index f189931042a7..45fcbf99d72e 100644 --- a/tools/include/uapi/linux/lirc.h +++ b/tools/include/uapi/linux/lirc.h @@ -134,6 +134,12 @@ #define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) /* + * Return the recording timeout, which is either set by + * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. + */ +#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) + +/* * struct lirc_scancode - decoded scancode with protocol for use with * LIRC_MODE_SCANCODE * @@ -186,6 +192,9 @@ struct lirc_scancode { * @RC_PROTO_XMP: XMP protocol * @RC_PROTO_CEC: CEC protocol * @RC_PROTO_IMON: iMon Pad protocol + * @RC_PROTO_RCMM12: RC-MM protocol 12 bits + * @RC_PROTO_RCMM24: RC-MM protocol 24 bits + * @RC_PROTO_RCMM32: RC-MM protocol 32 bits */ enum rc_proto { RC_PROTO_UNKNOWN = 0, @@ -212,6 +221,9 @@ enum rc_proto { RC_PROTO_XMP = 21, RC_PROTO_CEC = 22, RC_PROTO_IMON = 23, + RC_PROTO_RCMM12 = 24, + RC_PROTO_RCMM24 = 25, + RC_PROTO_RCMM32 = 26, }; #endif diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile new file mode 100644 index 000000000000..f79522fc37b5 --- /dev/null +++ b/tools/io_uring/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for io_uring test tools +CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE +LDLIBS += -lpthread + +all: io_uring-cp io_uring-bench +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +io_uring-bench: syscall.o io_uring-bench.o + $(CC) $(CFLAGS) $(LDLIBS) -o $@ $^ + +io_uring-cp: setup.o syscall.o queue.o + +clean: + $(RM) io_uring-cp io_uring-bench *.o + +.PHONY: all clean diff --git a/tools/io_uring/README b/tools/io_uring/README new file mode 100644 index 000000000000..67fd70115cff --- /dev/null +++ b/tools/io_uring/README @@ -0,0 +1,29 @@ +This directory includes a few programs that demonstrate how to use io_uring +in an application. The examples are: + +io_uring-cp + A very basic io_uring implementation of cp(1). It takes two + arguments, copies the first argument to the second. This example + is part of liburing, and hence uses the simplified liburing API + for setting up an io_uring instance, submitting IO, completing IO, + etc. The support functions in queue.c and setup.c are straight + out of liburing. + +io_uring-bench + Benchmark program that does random reads on a number of files. This + app demonstrates the various features of io_uring, like fixed files, + fixed buffers, and polled IO. There are options in the program to + control which features to use. Arguments is the file (or files) that + io_uring-bench should operate on. This uses the raw io_uring + interface. + +liburing can be cloned with git here: + + git://git.kernel.dk/liburing + +and contains a number of unit tests as well for testing io_uring. It also +comes with man pages for the three system calls. + +Fio includes an io_uring engine, you can clone fio here: + + git://git.kernel.dk/fio diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h new file mode 100644 index 000000000000..ef00f6722ba9 --- /dev/null +++ b/tools/io_uring/barrier.h @@ -0,0 +1,16 @@ +#ifndef LIBURING_BARRIER_H +#define LIBURING_BARRIER_H + +#if defined(__x86_64) || defined(__i386__) +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") +#else +/* + * Add arch appropriate definitions. Be safe and use full barriers for + * archs we don't have support for. + */ +#define read_barrier() __sync_synchronize() +#define write_barrier() __sync_synchronize() +#endif + +#endif diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c new file mode 100644 index 000000000000..512306a37531 --- /dev/null +++ b/tools/io_uring/io_uring-bench.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple benchmark program that uses the various features of io_uring + * to provide fast random access to a device/file. It has various + * options that are control how we use io_uring, see the OPTIONS section + * below. This uses the raw io_uring interface. + * + * Copyright (C) 2018-2019 Jens Axboe + */ +#include <stdio.h> +#include <errno.h> +#include <assert.h> +#include <stdlib.h> +#include <stddef.h> +#include <signal.h> +#include <inttypes.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/resource.h> +#include <sys/mman.h> +#include <sys/uio.h> +#include <linux/fs.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> +#include <sched.h> + +#include "liburing.h" +#include "barrier.h" + +#ifndef IOCQE_FLAG_CACHEHIT +#define IOCQE_FLAG_CACHEHIT (1U << 0) +#endif + +#define min(a, b) ((a < b) ? (a) : (b)) + +struct io_sq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + unsigned *flags; + unsigned *array; +}; + +struct io_cq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + struct io_uring_cqe *cqes; +}; + +#define DEPTH 128 + +#define BATCH_SUBMIT 32 +#define BATCH_COMPLETE 32 + +#define BS 4096 + +#define MAX_FDS 16 + +static unsigned sq_ring_mask, cq_ring_mask; + +struct file { + unsigned long max_blocks; + unsigned pending_ios; + int real_fd; + int fixed_fd; +}; + +struct submitter { + pthread_t thread; + int ring_fd; + struct drand48_data rand; + struct io_sq_ring sq_ring; + struct io_uring_sqe *sqes; + struct iovec iovecs[DEPTH]; + struct io_cq_ring cq_ring; + int inflight; + unsigned long reaps; + unsigned long done; + unsigned long calls; + unsigned long cachehit, cachemiss; + volatile int finish; + + __s32 *fds; + + struct file files[MAX_FDS]; + unsigned nr_files; + unsigned cur_file; +}; + +static struct submitter submitters[1]; +static volatile int finish; + +/* + * OPTIONS: Set these to test the various features of io_uring. + */ +static int polled = 1; /* use IO polling */ +static int fixedbufs = 1; /* use fixed user buffers */ +static int register_files = 1; /* use fixed files */ +static int buffered = 0; /* use buffered IO, not O_DIRECT */ +static int sq_thread_poll = 0; /* use kernel submission/poller thread */ +static int sq_thread_cpu = -1; /* pin above thread to this CPU */ +static int do_nop = 0; /* no-op SQ ring commands */ + +static int io_uring_register_buffers(struct submitter *s) +{ + if (do_nop) + return 0; + + return io_uring_register(s->ring_fd, IORING_REGISTER_BUFFERS, s->iovecs, + DEPTH); +} + +static int io_uring_register_files(struct submitter *s) +{ + unsigned i; + + if (do_nop) + return 0; + + s->fds = calloc(s->nr_files, sizeof(__s32)); + for (i = 0; i < s->nr_files; i++) { + s->fds[i] = s->files[i].real_fd; + s->files[i].fixed_fd = i; + } + + return io_uring_register(s->ring_fd, IORING_REGISTER_FILES, s->fds, + s->nr_files); +} + +static int gettid(void) +{ + return syscall(__NR_gettid); +} + +static unsigned file_depth(struct submitter *s) +{ + return (DEPTH + s->nr_files - 1) / s->nr_files; +} + +static void init_io(struct submitter *s, unsigned index) +{ + struct io_uring_sqe *sqe = &s->sqes[index]; + unsigned long offset; + struct file *f; + long r; + + if (do_nop) { + sqe->opcode = IORING_OP_NOP; + return; + } + + if (s->nr_files == 1) { + f = &s->files[0]; + } else { + f = &s->files[s->cur_file]; + if (f->pending_ios >= file_depth(s)) { + s->cur_file++; + if (s->cur_file == s->nr_files) + s->cur_file = 0; + f = &s->files[s->cur_file]; + } + } + f->pending_ios++; + + lrand48_r(&s->rand, &r); + offset = (r % (f->max_blocks - 1)) * BS; + + if (register_files) { + sqe->flags = IOSQE_FIXED_FILE; + sqe->fd = f->fixed_fd; + } else { + sqe->flags = 0; + sqe->fd = f->real_fd; + } + if (fixedbufs) { + sqe->opcode = IORING_OP_READ_FIXED; + sqe->addr = (unsigned long) s->iovecs[index].iov_base; + sqe->len = BS; + sqe->buf_index = index; + } else { + sqe->opcode = IORING_OP_READV; + sqe->addr = (unsigned long) &s->iovecs[index]; + sqe->len = 1; + sqe->buf_index = 0; + } + sqe->ioprio = 0; + sqe->off = offset; + sqe->user_data = (unsigned long) f; +} + +static int prep_more_ios(struct submitter *s, unsigned max_ios) +{ + struct io_sq_ring *ring = &s->sq_ring; + unsigned index, tail, next_tail, prepped = 0; + + next_tail = tail = *ring->tail; + do { + next_tail++; + read_barrier(); + if (next_tail == *ring->head) + break; + + index = tail & sq_ring_mask; + init_io(s, index); + ring->array[index] = index; + prepped++; + tail = next_tail; + } while (prepped < max_ios); + + if (*ring->tail != tail) { + /* order tail store with writes to sqes above */ + write_barrier(); + *ring->tail = tail; + write_barrier(); + } + return prepped; +} + +static int get_file_size(struct file *f) +{ + struct stat st; + + if (fstat(f->real_fd, &st) < 0) + return -1; + if (S_ISBLK(st.st_mode)) { + unsigned long long bytes; + + if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0) + return -1; + + f->max_blocks = bytes / BS; + return 0; + } else if (S_ISREG(st.st_mode)) { + f->max_blocks = st.st_size / BS; + return 0; + } + + return -1; +} + +static int reap_events(struct submitter *s) +{ + struct io_cq_ring *ring = &s->cq_ring; + struct io_uring_cqe *cqe; + unsigned head, reaped = 0; + + head = *ring->head; + do { + struct file *f; + + read_barrier(); + if (head == *ring->tail) + break; + cqe = &ring->cqes[head & cq_ring_mask]; + if (!do_nop) { + f = (struct file *) (uintptr_t) cqe->user_data; + f->pending_ios--; + if (cqe->res != BS) { + printf("io: unexpected ret=%d\n", cqe->res); + if (polled && cqe->res == -EOPNOTSUPP) + printf("Your filesystem doesn't support poll\n"); + return -1; + } + } + if (cqe->flags & IOCQE_FLAG_CACHEHIT) + s->cachehit++; + else + s->cachemiss++; + reaped++; + head++; + } while (1); + + s->inflight -= reaped; + *ring->head = head; + write_barrier(); + return reaped; +} + +static void *submitter_fn(void *data) +{ + struct submitter *s = data; + struct io_sq_ring *ring = &s->sq_ring; + int ret, prepped; + + printf("submitter=%d\n", gettid()); + + srand48_r(pthread_self(), &s->rand); + + prepped = 0; + do { + int to_wait, to_submit, this_reap, to_prep; + + if (!prepped && s->inflight < DEPTH) { + to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT); + prepped = prep_more_ios(s, to_prep); + } + s->inflight += prepped; +submit_more: + to_submit = prepped; +submit: + if (to_submit && (s->inflight + to_submit <= DEPTH)) + to_wait = 0; + else + to_wait = min(s->inflight + to_submit, BATCH_COMPLETE); + + /* + * Only need to call io_uring_enter if we're not using SQ thread + * poll, or if IORING_SQ_NEED_WAKEUP is set. + */ + if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) { + unsigned flags = 0; + + if (to_wait) + flags = IORING_ENTER_GETEVENTS; + if ((*ring->flags & IORING_SQ_NEED_WAKEUP)) + flags |= IORING_ENTER_SQ_WAKEUP; + ret = io_uring_enter(s->ring_fd, to_submit, to_wait, + flags, NULL); + s->calls++; + } + + /* + * For non SQ thread poll, we already got the events we needed + * through the io_uring_enter() above. For SQ thread poll, we + * need to loop here until we find enough events. + */ + this_reap = 0; + do { + int r; + r = reap_events(s); + if (r == -1) { + s->finish = 1; + break; + } else if (r > 0) + this_reap += r; + } while (sq_thread_poll && this_reap < to_wait); + s->reaps += this_reap; + + if (ret >= 0) { + if (!ret) { + to_submit = 0; + if (s->inflight) + goto submit; + continue; + } else if (ret < to_submit) { + int diff = to_submit - ret; + + s->done += ret; + prepped -= diff; + goto submit_more; + } + s->done += ret; + prepped = 0; + continue; + } else if (ret < 0) { + if (errno == EAGAIN) { + if (s->finish) + break; + if (this_reap) + goto submit; + to_submit = 0; + goto submit; + } + printf("io_submit: %s\n", strerror(errno)); + break; + } + } while (!s->finish); + + finish = 1; + return NULL; +} + +static void sig_int(int sig) +{ + printf("Exiting on signal %d\n", sig); + submitters[0].finish = 1; + finish = 1; +} + +static void arm_sig_int(void) +{ + struct sigaction act; + + memset(&act, 0, sizeof(act)); + act.sa_handler = sig_int; + act.sa_flags = SA_RESTART; + sigaction(SIGINT, &act, NULL); +} + +static int setup_ring(struct submitter *s) +{ + struct io_sq_ring *sring = &s->sq_ring; + struct io_cq_ring *cring = &s->cq_ring; + struct io_uring_params p; + int ret, fd; + void *ptr; + + memset(&p, 0, sizeof(p)); + + if (polled && !do_nop) + p.flags |= IORING_SETUP_IOPOLL; + if (sq_thread_poll) { + p.flags |= IORING_SETUP_SQPOLL; + if (sq_thread_cpu != -1) { + p.flags |= IORING_SETUP_SQ_AFF; + p.sq_thread_cpu = sq_thread_cpu; + } + } + + fd = io_uring_setup(DEPTH, &p); + if (fd < 0) { + perror("io_uring_setup"); + return 1; + } + s->ring_fd = fd; + + if (fixedbufs) { + ret = io_uring_register_buffers(s); + if (ret < 0) { + perror("io_uring_register_buffers"); + return 1; + } + } + + if (register_files) { + ret = io_uring_register_files(s); + if (ret < 0) { + perror("io_uring_register_files"); + return 1; + } + } + + ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_SQ_RING); + printf("sq_ring ptr = 0x%p\n", ptr); + sring->head = ptr + p.sq_off.head; + sring->tail = ptr + p.sq_off.tail; + sring->ring_mask = ptr + p.sq_off.ring_mask; + sring->ring_entries = ptr + p.sq_off.ring_entries; + sring->flags = ptr + p.sq_off.flags; + sring->array = ptr + p.sq_off.array; + sq_ring_mask = *sring->ring_mask; + + s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_SQES); + printf("sqes ptr = 0x%p\n", s->sqes); + + ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_CQ_RING); + printf("cq_ring ptr = 0x%p\n", ptr); + cring->head = ptr + p.cq_off.head; + cring->tail = ptr + p.cq_off.tail; + cring->ring_mask = ptr + p.cq_off.ring_mask; + cring->ring_entries = ptr + p.cq_off.ring_entries; + cring->cqes = ptr + p.cq_off.cqes; + cq_ring_mask = *cring->ring_mask; + return 0; +} + +static void file_depths(char *buf) +{ + struct submitter *s = &submitters[0]; + unsigned i; + char *p; + + buf[0] = '\0'; + p = buf; + for (i = 0; i < s->nr_files; i++) { + struct file *f = &s->files[i]; + + if (i + 1 == s->nr_files) + p += sprintf(p, "%d", f->pending_ios); + else + p += sprintf(p, "%d, ", f->pending_ios); + } +} + +int main(int argc, char *argv[]) +{ + struct submitter *s = &submitters[0]; + unsigned long done, calls, reap, cache_hit, cache_miss; + int err, i, flags, fd; + char *fdepths; + void *ret; + + if (!do_nop && argc < 2) { + printf("%s: filename\n", argv[0]); + return 1; + } + + flags = O_RDONLY | O_NOATIME; + if (!buffered) + flags |= O_DIRECT; + + i = 1; + while (!do_nop && i < argc) { + struct file *f; + + if (s->nr_files == MAX_FDS) { + printf("Max number of files (%d) reached\n", MAX_FDS); + break; + } + fd = open(argv[i], flags); + if (fd < 0) { + perror("open"); + return 1; + } + + f = &s->files[s->nr_files]; + f->real_fd = fd; + if (get_file_size(f)) { + printf("failed getting size of device/file\n"); + return 1; + } + if (f->max_blocks <= 1) { + printf("Zero file/device size?\n"); + return 1; + } + f->max_blocks--; + + printf("Added file %s\n", argv[i]); + s->nr_files++; + i++; + } + + if (fixedbufs) { + struct rlimit rlim; + + rlim.rlim_cur = RLIM_INFINITY; + rlim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { + perror("setrlimit"); + return 1; + } + } + + arm_sig_int(); + + for (i = 0; i < DEPTH; i++) { + void *buf; + + if (posix_memalign(&buf, BS, BS)) { + printf("failed alloc\n"); + return 1; + } + s->iovecs[i].iov_base = buf; + s->iovecs[i].iov_len = BS; + } + + err = setup_ring(s); + if (err) { + printf("ring setup failed: %s, %d\n", strerror(errno), err); + return 1; + } + printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered); + printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries); + + pthread_create(&s->thread, NULL, submitter_fn, s); + + fdepths = malloc(8 * s->nr_files); + cache_hit = cache_miss = reap = calls = done = 0; + do { + unsigned long this_done = 0; + unsigned long this_reap = 0; + unsigned long this_call = 0; + unsigned long this_cache_hit = 0; + unsigned long this_cache_miss = 0; + unsigned long rpc = 0, ipc = 0; + double hit = 0.0; + + sleep(1); + this_done += s->done; + this_call += s->calls; + this_reap += s->reaps; + this_cache_hit += s->cachehit; + this_cache_miss += s->cachemiss; + if (this_cache_hit && this_cache_miss) { + unsigned long hits, total; + + hits = this_cache_hit - cache_hit; + total = hits + this_cache_miss - cache_miss; + hit = (double) hits / (double) total; + hit *= 100.0; + } + if (this_call - calls) { + rpc = (this_done - done) / (this_call - calls); + ipc = (this_reap - reap) / (this_call - calls); + } else + rpc = ipc = -1; + file_depths(fdepths); + printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n", + this_done - done, rpc, ipc, s->inflight, + fdepths, hit); + done = this_done; + calls = this_call; + reap = this_reap; + cache_hit = s->cachehit; + cache_miss = s->cachemiss; + } while (!finish); + + pthread_join(s->thread, &ret); + close(s->ring_fd); + free(fdepths); + return 0; +} diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c new file mode 100644 index 000000000000..633f65bb43a7 --- /dev/null +++ b/tools/io_uring/io_uring-cp.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple test program that demonstrates a file copy through io_uring. This + * uses the API exposed by liburing. + * + * Copyright (C) 2018-2019 Jens Axboe + */ +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <sys/ioctl.h> + +#include "liburing.h" + +#define QD 64 +#define BS (32*1024) + +static int infd, outfd; + +struct io_data { + int read; + off_t first_offset, offset; + size_t first_len; + struct iovec iov; +}; + +static int setup_context(unsigned entries, struct io_uring *ring) +{ + int ret; + + ret = io_uring_queue_init(entries, ring, 0); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return -1; + } + + return 0; +} + +static int get_file_size(int fd, off_t *size) +{ + struct stat st; + + if (fstat(fd, &st) < 0) + return -1; + if (S_ISREG(st.st_mode)) { + *size = st.st_size; + return 0; + } else if (S_ISBLK(st.st_mode)) { + unsigned long long bytes; + + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -1; + + *size = bytes; + return 0; + } + + return -1; +} + +static void queue_prepped(struct io_uring *ring, struct io_data *data) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + assert(sqe); + + if (data->read) + io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset); + else + io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset); + + io_uring_sqe_set_data(sqe, data); +} + +static int queue_read(struct io_uring *ring, off_t size, off_t offset) +{ + struct io_uring_sqe *sqe; + struct io_data *data; + + sqe = io_uring_get_sqe(ring); + if (!sqe) + return 1; + + data = malloc(size + sizeof(*data)); + data->read = 1; + data->offset = data->first_offset = offset; + + data->iov.iov_base = data + 1; + data->iov.iov_len = size; + data->first_len = size; + + io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); + io_uring_sqe_set_data(sqe, data); + return 0; +} + +static void queue_write(struct io_uring *ring, struct io_data *data) +{ + data->read = 0; + data->offset = data->first_offset; + + data->iov.iov_base = data + 1; + data->iov.iov_len = data->first_len; + + queue_prepped(ring, data); + io_uring_submit(ring); +} + +static int copy_file(struct io_uring *ring, off_t insize) +{ + unsigned long reads, writes; + struct io_uring_cqe *cqe; + off_t write_left, offset; + int ret; + + write_left = insize; + writes = reads = offset = 0; + + while (insize || write_left) { + unsigned long had_reads; + int got_comp; + + /* + * Queue up as many reads as we can + */ + had_reads = reads; + while (insize) { + off_t this_size = insize; + + if (reads + writes >= QD) + break; + if (this_size > BS) + this_size = BS; + else if (!this_size) + break; + + if (queue_read(ring, this_size, offset)) + break; + + insize -= this_size; + offset += this_size; + reads++; + } + + if (had_reads != reads) { + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); + break; + } + } + + /* + * Queue is full at this point. Find at least one completion. + */ + got_comp = 0; + while (write_left) { + struct io_data *data; + + if (!got_comp) { + ret = io_uring_wait_completion(ring, &cqe); + got_comp = 1; + } else + ret = io_uring_get_completion(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "io_uring_get_completion: %s\n", + strerror(-ret)); + return 1; + } + if (!cqe) + break; + + data = (struct io_data *) (uintptr_t) cqe->user_data; + if (cqe->res < 0) { + if (cqe->res == -EAGAIN) { + queue_prepped(ring, data); + continue; + } + fprintf(stderr, "cqe failed: %s\n", + strerror(-cqe->res)); + return 1; + } else if ((size_t) cqe->res != data->iov.iov_len) { + /* Short read/write, adjust and requeue */ + data->iov.iov_base += cqe->res; + data->iov.iov_len -= cqe->res; + data->offset += cqe->res; + queue_prepped(ring, data); + continue; + } + + /* + * All done. if write, nothing else to do. if read, + * queue up corresponding write. + */ + if (data->read) { + queue_write(ring, data); + write_left -= data->first_len; + reads--; + writes++; + } else { + free(data); + writes--; + } + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + off_t insize; + int ret; + + if (argc < 3) { + printf("%s: infile outfile\n", argv[0]); + return 1; + } + + infd = open(argv[1], O_RDONLY); + if (infd < 0) { + perror("open infile"); + return 1; + } + outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (outfd < 0) { + perror("open outfile"); + return 1; + } + + if (setup_context(QD, &ring)) + return 1; + if (get_file_size(infd, &insize)) + return 1; + + ret = copy_file(&ring, insize); + + close(infd); + close(outfd); + io_uring_queue_exit(&ring); + return ret; +} diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h new file mode 100644 index 000000000000..cab0f50257ba --- /dev/null +++ b/tools/io_uring/liburing.h @@ -0,0 +1,143 @@ +#ifndef LIB_URING_H +#define LIB_URING_H + +#include <sys/uio.h> +#include <signal.h> +#include <string.h> +#include "../../include/uapi/linux/io_uring.h" + +/* + * Library interface to io_uring + */ +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + int ring_fd; +}; + +/* + * System calls + */ +extern int io_uring_setup(unsigned entries, struct io_uring_params *p); +extern int io_uring_enter(unsigned fd, unsigned to_submit, + unsigned min_complete, unsigned flags, sigset_t *sig); +extern int io_uring_register(int fd, unsigned int opcode, void *arg, + unsigned int nr_args); + +/* + * Library interface + */ +extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags); +extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, + struct io_uring *ring); +extern void io_uring_queue_exit(struct io_uring *ring); +extern int io_uring_get_completion(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_wait_completion(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_submit(struct io_uring *ring); +extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); + +/* + * Command prep helpers + */ +static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) +{ + sqe->user_data = (unsigned long) data; +} + +static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, + void *addr, unsigned len, off_t offset) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = op; + sqe->fd = fd; + sqe->off = offset; + sqe->addr = (unsigned long) addr; + sqe->len = len; +} + +static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, + struct iovec *iovecs, unsigned nr_vecs, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, + struct iovec *iovecs, unsigned nr_vecs, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, + short poll_mask) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_ADD; + sqe->fd = fd; + sqe->poll_events = poll_mask; +} + +static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, + void *user_data) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_REMOVE; + sqe->addr = (unsigned long) user_data; +} + +static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, + int datasync) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_FSYNC; + sqe->fd = fd; + if (datasync) + sqe->fsync_flags = IORING_FSYNC_DATASYNC; +} + +#endif diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c new file mode 100644 index 000000000000..88505e873ad9 --- /dev/null +++ b/tools/io_uring/queue.c @@ -0,0 +1,164 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> + +#include "liburing.h" +#include "barrier.h" + +static int __io_uring_get_completion(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, int wait) +{ + struct io_uring_cq *cq = &ring->cq; + const unsigned mask = *cq->kring_mask; + unsigned head; + int ret; + + *cqe_ptr = NULL; + head = *cq->khead; + do { + /* + * It's necessary to use a read_barrier() before reading + * the CQ tail, since the kernel updates it locklessly. The + * kernel has the matching store barrier for the update. The + * kernel also ensures that previous stores to CQEs are ordered + * with the tail update. + */ + read_barrier(); + if (head != *cq->ktail) { + *cqe_ptr = &cq->cqes[head & mask]; + break; + } + if (!wait) + break; + ret = io_uring_enter(ring->ring_fd, 0, 1, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + } while (1); + + if (*cqe_ptr) { + *cq->khead = head + 1; + /* + * Ensure that the kernel sees our new head, the kernel has + * the matching read barrier. + */ + write_barrier(); + } + + return 0; +} + +/* + * Return an IO completion, if one is readily available + */ +int io_uring_get_completion(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + return __io_uring_get_completion(ring, cqe_ptr, 0); +} + +/* + * Return an IO completion, waiting for it if necessary + */ +int io_uring_wait_completion(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + return __io_uring_get_completion(ring, cqe_ptr, 1); +} + +/* + * Submit sqes acquired from io_uring_get_sqe() to the kernel. + * + * Returns number of sqes submitted + */ +int io_uring_submit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned mask = *sq->kring_mask; + unsigned ktail, ktail_next, submitted; + int ret; + + /* + * If we have pending IO in the kring, submit it first. We need a + * read barrier here to match the kernels store barrier when updating + * the SQ head. + */ + read_barrier(); + if (*sq->khead != *sq->ktail) { + submitted = *sq->kring_entries; + goto submit; + } + + if (sq->sqe_head == sq->sqe_tail) + return 0; + + /* + * Fill in sqes that we have queued up, adding them to the kernel ring + */ + submitted = 0; + ktail = ktail_next = *sq->ktail; + while (sq->sqe_head < sq->sqe_tail) { + ktail_next++; + read_barrier(); + + sq->array[ktail & mask] = sq->sqe_head & mask; + ktail = ktail_next; + + sq->sqe_head++; + submitted++; + } + + if (!submitted) + return 0; + + if (*sq->ktail != ktail) { + /* + * First write barrier ensures that the SQE stores are updated + * with the tail update. This is needed so that the kernel + * will never see a tail update without the preceeding sQE + * stores being done. + */ + write_barrier(); + *sq->ktail = ktail; + /* + * The kernel has the matching read barrier for reading the + * SQ tail. + */ + write_barrier(); + } + +submit: + ret = io_uring_enter(ring->ring_fd, submitted, 0, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + + return 0; +} + +/* + * Return an sqe to fill. Application must later call io_uring_submit() + * when it's ready to tell the kernel about it. The caller may call this + * function multiple times before calling io_uring_submit(). + * + * Returns a vacant sqe, or NULL if we're full. + */ +struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + unsigned next = sq->sqe_tail + 1; + struct io_uring_sqe *sqe; + + /* + * All sqes are used + */ + if (next - sq->sqe_head > *sq->kring_entries) + return NULL; + + sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; + sq->sqe_tail = next; + return sqe; +} diff --git a/tools/io_uring/setup.c b/tools/io_uring/setup.c new file mode 100644 index 000000000000..4da19a77132c --- /dev/null +++ b/tools/io_uring/setup.c @@ -0,0 +1,103 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> + +#include "liburing.h" + +static int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + void *ptr; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (ptr == MAP_FAILED) + return -errno; + sq->khead = ptr + p->sq_off.head; + sq->ktail = ptr + p->sq_off.tail; + sq->kring_mask = ptr + p->sq_off.ring_mask; + sq->kring_entries = ptr + p->sq_off.ring_entries; + sq->kflags = ptr + p->sq_off.flags; + sq->kdropped = ptr + p->sq_off.dropped; + sq->array = ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe), + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + munmap(sq->khead, sq->ring_sz); + return ret; + } + + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (ptr == MAP_FAILED) { + ret = -errno; + munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); + goto err; + } + cq->khead = ptr + p->cq_off.head; + cq->ktail = ptr + p->cq_off.tail; + cq->kring_mask = ptr + p->cq_off.ring_mask; + cq->kring_entries = ptr + p->cq_off.ring_entries; + cq->koverflow = ptr + p->cq_off.overflow; + cq->cqes = ptr + p->cq_off.cqes; + return 0; +} + +/* + * For users that want to specify sq_thread_cpu or sq_thread_idle, this + * interface is a convenient helper for mmap()ing the rings. + * Returns -1 on error, or zero on success. On success, 'ring' + * contains the necessary information to read/write to the rings. + */ +int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring) +{ + int ret; + + memset(ring, 0, sizeof(*ring)); + ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); + if (!ret) + ring->ring_fd = fd; + return ret; +} + +/* + * Returns -1 on error, or zero on success. On success, 'ring' + * contains the necessary information to read/write to the rings. + */ +int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) +{ + struct io_uring_params p; + int fd; + + memset(&p, 0, sizeof(p)); + p.flags = flags; + + fd = io_uring_setup(entries, &p); + if (fd < 0) + return fd; + + return io_uring_queue_mmap(fd, &p, ring); +} + +void io_uring_queue_exit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + struct io_uring_cq *cq = &ring->cq; + + munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); + munmap(sq->khead, sq->ring_sz); + munmap(cq->khead, cq->ring_sz); + close(ring->ring_fd); +} diff --git a/tools/io_uring/syscall.c b/tools/io_uring/syscall.c new file mode 100644 index 000000000000..6b835e5c6a5b --- /dev/null +++ b/tools/io_uring/syscall.c @@ -0,0 +1,40 @@ +/* + * Will go away once libc support is there + */ +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <signal.h> +#include "liburing.h" + +#if defined(__x86_64) || defined(__i386__) +#ifndef __NR_sys_io_uring_setup +#define __NR_sys_io_uring_setup 425 +#endif +#ifndef __NR_sys_io_uring_enter +#define __NR_sys_io_uring_enter 426 +#endif +#ifndef __NR_sys_io_uring_register +#define __NR_sys_io_uring_register 427 +#endif +#else +#error "Arch not supported yet" +#endif + +int io_uring_register(int fd, unsigned int opcode, void *arg, + unsigned int nr_args) +{ + return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args); +} + +int io_uring_setup(unsigned entries, struct io_uring_params *p) +{ + return syscall(__NR_sys_io_uring_setup, entries, p); +} + +int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete, + unsigned flags, sigset_t *sig) +{ + return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 44195514b19e..98ad783efc69 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -34,6 +34,7 @@ #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> +#include <linux/numa.h> #include <numa.h> #include <numaif.h> @@ -298,7 +299,7 @@ static cpu_set_t bind_to_node(int target_node) CPU_ZERO(&mask); - if (target_node == -1) { + if (target_node == NUMA_NO_NODE) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { @@ -339,7 +340,7 @@ static void bind_to_memnode(int node) unsigned long nodemask; int ret; - if (node == -1) + if (node == NUMA_NO_NODE) return; BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); @@ -1363,7 +1364,7 @@ static void init_thread_data(void) int cpu; /* Allow all nodes by default: */ - td->bind_node = -1; + td->bind_node = NUMA_NO_NODE; /* Allow all CPUs by default: */ CPU_ZERO(&td->bind_cpumask); diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index ff8025de8237..28c11c6b4d06 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -3,7 +3,7 @@ * * Module Name: cfsize - Common get file size function * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 7be89b873661..6b41d8b64a00 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -3,7 +3,7 @@ * * Module Name: getopt * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index a20c703f8b7d..2a1fd9182f94 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -3,7 +3,7 @@ * * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index 4fd44e218a83..30913f124dd5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -3,7 +3,7 @@ * * Module Name: osunixdir - Unix directory access interfaces * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 93d8359309b6..29dfb47adfeb 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -3,7 +3,7 @@ * * Module Name: osunixmap - Unix OSL for file mappings * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 8a88e87778bd..83d3b3b829b8 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -3,7 +3,7 @@ * * Module Name: osunixxf - UNIX OSL interfaces * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index f69f1f559743..2eb0aaa4f462 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -3,7 +3,7 @@ * * Module Name: acpidump.h - Include file for acpi_dump utility * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index a9848de32d8e..e256c2ac5ddc 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -3,7 +3,7 @@ * * Module Name: apdump - Dump routines for ACPI tables (acpidump) * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index e86207e5afcf..49972bc78bc5 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -3,7 +3,7 @@ * * Module Name: apfiles - File-related functions for acpidump utility * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 2d9b94b631cb..d8f1b57537d3 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -3,7 +3,7 @@ * * Module Name: apmain - Main module for the acpidump utility * - * Copyright (C) 2000 - 2018, Intel Corp. + * Copyright (C) 2000 - 2019, Intel Corp. * *****************************************************************************/ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 400ee81a3043..313dc1091e6f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -22,6 +22,7 @@ TARGETS += ir TARGETS += kcmp TARGETS += kvm TARGETS += lib +TARGETS += livepatch TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug @@ -48,6 +49,7 @@ TARGETS += sysctl ifneq (1, $(quicktest)) TARGETS += timers endif +TARGETS += tmpfs TARGETS += user TARGETS += vm TARGETS += x86 diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config index 913a25a4a32b..bf634dda0720 100644 --- a/tools/testing/selftests/firmware/config +++ b/tools/testing/selftests/firmware/config @@ -1,6 +1,5 @@ CONFIG_TEST_FIRMWARE=y CONFIG_FW_LOADER=y CONFIG_FW_LOADER_USER_HELPER=y -CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 466cf2f91ba0..a4320c4b44dc 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -155,8 +155,11 @@ read_firmwares() { for i in $(seq 0 3); do config_set_read_fw_idx $i - # Verify the contents match - if ! diff -q "$FW" $DIR/read_firmware 2>/dev/null ; then + # Verify the contents are what we expect. + # -Z required for now -- check for yourself, md5sum + # on $FW and DIR/read_firmware will yield the same. Even + # cmp agrees, so something is off. + if ! diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then echo "request #$i: firmware was not loaded" >&2 exit 1 fi @@ -168,7 +171,7 @@ read_firmwares_expect_nofile() for i in $(seq 0 3); do config_set_read_fw_idx $i # Ensures contents differ - if diff -q "$FW" $DIR/read_firmware 2>/dev/null ; then + if diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then echo "request $i: file was not expected to match" >&2 exit 1 fi diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 6c5f1b2ffb74..1cbb12e284a6 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -91,7 +91,7 @@ verify_reqs() if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "usermode helper disabled so ignoring test" - exit $ksft_skip + exit 0 fi fi } diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 75244db70331..136387422b00 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -154,17 +154,17 @@ fi # Define text colors # Check available colors on the terminal, if any -ncolors=`tput colors 2>/dev/null` +ncolors=`tput colors 2>/dev/null || echo 0` color_reset= color_red= color_green= color_blue= # If stdout exists and number of colors is eight or more, use them -if [ -t 1 -a "$ncolors" -a "$ncolors" -ge 8 ]; then - color_reset="\e[0m" - color_red="\e[31m" - color_green="\e[32m" - color_blue="\e[34m" +if [ -t 1 -a "$ncolors" -ge 8 ]; then + color_reset="\033[0m" + color_red="\033[31m" + color_green="\033[32m" + color_blue="\033[34m" fi strip_esc() { @@ -173,8 +173,13 @@ strip_esc() { } prlog() { # messages - echo -e "$@" - [ "$LOG_FILE" ] && echo -e "$@" | strip_esc >> $LOG_FILE + newline="\n" + if [ "$1" = "-n" ] ; then + newline= + shift + fi + printf "$*$newline" + [ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE } catlog() { #file cat $1 diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index 858c19caf224..e700e09e3682 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -27,6 +27,8 @@ #define TEST_SCANCODES 10 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define SYSFS_PATH_MAX 256 +#define DNAME_PATH_MAX 256 static const struct { enum rc_proto proto; @@ -51,12 +53,16 @@ static const struct { { RC_PROTO_RC6_6A_32, "rc-6-6a-32", 0xffffffff, "rc-6" }, { RC_PROTO_RC6_MCE, "rc-6-mce", 0x00007fff, "rc-6" }, { RC_PROTO_SHARP, "sharp", 0x1fff, "sharp" }, + { RC_PROTO_IMON, "imon", 0x7fffffff, "imon" }, + { RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rcmm" }, + { RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rcmm" }, + { RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rcmm" }, }; int lirc_open(const char *rc) { struct dirent *dent; - char buf[100]; + char buf[SYSFS_PATH_MAX + DNAME_PATH_MAX]; DIR *d; int fd; @@ -74,7 +80,7 @@ int lirc_open(const char *rc) } if (!dent) - ksft_exit_fail_msg("cannot find lirc device for %s\n", rc); + ksft_exit_skip("cannot find lirc device for %s\n", rc); closedir(d); @@ -139,6 +145,11 @@ int main(int argc, char **argv) (((scancode >> 8) ^ ~scancode) & 0xff) == 0) continue; + if (rc_proto == RC_PROTO_RCMM32 && + (scancode & 0x000c0000) != 0x000c0000 && + scancode & 0x00008000) + continue; + struct lirc_scancode lsc = { .rc_proto = rc_proto, .scancode = scancode diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh index 0a0b8dfa39be..b90dc9939f45 100755 --- a/tools/testing/selftests/ir/ir_loopback.sh +++ b/tools/testing/selftests/ir/ir_loopback.sh @@ -4,6 +4,11 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +if [ $UID != 0 ]; then + echo "Please run ir_loopback test as root [SKIP]" + exit $ksft_skip +fi + if ! /sbin/modprobe -q -n rc-loopback; then echo "ir_loopback: module rc-loopback is not found [SKIP]" exit $ksft_skip diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index a3edb2c8e43d..47e1d995c182 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -13,6 +13,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <stdio.h> /* define kselftest exit codes */ #define KSFT_PASS 0 diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 76d654ef3234..2d90c98eeb67 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -168,8 +168,8 @@ #define __TEST_IMPL(test_name, _signal) \ static void test_name(struct __test_metadata *_metadata); \ static struct __test_metadata _##test_name##_object = \ - { name: "global." #test_name, \ - fn: &test_name, termsig: _signal }; \ + { .name = "global." #test_name, \ + .fn = &test_name, .termsig = _signal }; \ static void __attribute__((constructor)) _register_##test_name(void) \ { \ __register_test(&_##test_name##_object); \ @@ -304,9 +304,9 @@ } \ static struct __test_metadata \ _##fixture_name##_##test_name##_object = { \ - name: #fixture_name "." #test_name, \ - fn: &wrapper_##fixture_name##_##test_name, \ - termsig: signal, \ + .name = #fixture_name "." #test_name, \ + .fn = &wrapper_##fixture_name##_##test_name, \ + .termsig = signal, \ }; \ static void __attribute__((constructor)) \ _register_##fixture_name##_##test_name(void) \ diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile new file mode 100644 index 000000000000..af4aee79bebb --- /dev/null +++ b/tools/testing/selftests/livepatch/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := \ + test-livepatch.sh \ + test-callbacks.sh \ + test-shadow-vars.sh + +include ../lib.mk diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README new file mode 100644 index 000000000000..b73cd0e2dd51 --- /dev/null +++ b/tools/testing/selftests/livepatch/README @@ -0,0 +1,43 @@ +==================== +Livepatch Self Tests +==================== + +This is a small set of sanity tests for the kernel livepatching. + +The test suite loads and unloads several test kernel modules to verify +livepatch behavior. Debug information is logged to the kernel's message +buffer and parsed for expected messages. (Note: the tests will clear +the message buffer between individual tests.) + + +Config +------ + +Set these config options and their prerequisites: + +CONFIG_LIVEPATCH=y +CONFIG_TEST_LIVEPATCH=m + + +Running the tests +----------------- + +Test kernel modules are built as part of lib/ (make modules) and need to +be installed (make modules_install) as the test scripts will modprobe +them. + +To run the livepatch selftests, from the top of the kernel source tree: + + % make -C tools/testing/selftests TARGETS=livepatch run_tests + + +Adding tests +------------ + +See the common functions.sh file for the existing collection of utility +functions, most importantly set_dynamic_debug() and check_result(). The +latter function greps the kernel's ring buffer for "livepatch:" and +"test_klp" strings, so tests be sure to include one of those strings for +result comparison. Other utility functions include general module +loading and livepatch loading helpers (waiting for patch transitions, +sysfs entries, etc.) diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config new file mode 100644 index 000000000000..0dd7700464a8 --- /dev/null +++ b/tools/testing/selftests/livepatch/config @@ -0,0 +1 @@ +CONFIG_TEST_LIVEPATCH=m diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh new file mode 100644 index 000000000000..30195449c63c --- /dev/null +++ b/tools/testing/selftests/livepatch/functions.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +# Shell functions for the rest of the scripts. + +MAX_RETRIES=600 +RETRY_INTERVAL=".1" # seconds + +# log(msg) - write message to kernel log +# msg - insightful words +function log() { + echo "$1" > /dev/kmsg +} + +# die(msg) - game over, man +# msg - dying words +function die() { + log "ERROR: $1" + echo "ERROR: $1" >&2 + exit 1 +} + +# set_dynamic_debug() - setup kernel dynamic debug +# TODO - push and pop this config? +function set_dynamic_debug() { + cat << EOF > /sys/kernel/debug/dynamic_debug/control +file kernel/livepatch/* +p +func klp_try_switch_task -p +EOF +} + +# loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES, +# sleep $RETRY_INTERVAL between attempts +# cmd - command and its arguments to run +function loop_until() { + local cmd="$*" + local i=0 + while true; do + eval "$cmd" && return 0 + [[ $((i++)) -eq $MAX_RETRIES ]] && return 1 + sleep $RETRY_INTERVAL + done +} + +function is_livepatch_mod() { + local mod="$1" + + if [[ $(modinfo "$mod" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then + return 0 + fi + + return 1 +} + +function __load_mod() { + local mod="$1"; shift + + local msg="% modprobe $mod $*" + log "${msg%% }" + ret=$(modprobe "$mod" "$@" 2>&1) + if [[ "$ret" != "" ]]; then + die "$ret" + fi + + # Wait for module in sysfs ... + loop_until '[[ -e "/sys/module/$mod" ]]' || + die "failed to load module $mod" +} + + +# load_mod(modname, params) - load a kernel module +# modname - module name to load +# params - module parameters to pass to modprobe +function load_mod() { + local mod="$1"; shift + + is_livepatch_mod "$mod" && + die "use load_lp() to load the livepatch module $mod" + + __load_mod "$mod" "$@" +} + +# load_lp_nowait(modname, params) - load a kernel module with a livepatch +# but do not wait on until the transition finishes +# modname - module name to load +# params - module parameters to pass to modprobe +function load_lp_nowait() { + local mod="$1"; shift + + is_livepatch_mod "$mod" || + die "module $mod is not a livepatch" + + __load_mod "$mod" "$@" + + # Wait for livepatch in sysfs ... + loop_until '[[ -e "/sys/kernel/livepatch/$mod" ]]' || + die "failed to load module $mod (sysfs)" +} + +# load_lp(modname, params) - load a kernel module with a livepatch +# modname - module name to load +# params - module parameters to pass to modprobe +function load_lp() { + local mod="$1"; shift + + load_lp_nowait "$mod" "$@" + + # Wait until the transition finishes ... + loop_until 'grep -q '^0$' /sys/kernel/livepatch/$mod/transition' || + die "failed to complete transition" +} + +# load_failing_mod(modname, params) - load a kernel module, expect to fail +# modname - module name to load +# params - module parameters to pass to modprobe +function load_failing_mod() { + local mod="$1"; shift + + local msg="% modprobe $mod $*" + log "${msg%% }" + ret=$(modprobe "$mod" "$@" 2>&1) + if [[ "$ret" == "" ]]; then + die "$mod unexpectedly loaded" + fi + log "$ret" +} + +# unload_mod(modname) - unload a kernel module +# modname - module name to unload +function unload_mod() { + local mod="$1" + + # Wait for module reference count to clear ... + loop_until '[[ $(cat "/sys/module/$mod/refcnt") == "0" ]]' || + die "failed to unload module $mod (refcnt)" + + log "% rmmod $mod" + ret=$(rmmod "$mod" 2>&1) + if [[ "$ret" != "" ]]; then + die "$ret" + fi + + # Wait for module in sysfs ... + loop_until '[[ ! -e "/sys/module/$mod" ]]' || + die "failed to unload module $mod (/sys/module)" +} + +# unload_lp(modname) - unload a kernel module with a livepatch +# modname - module name to unload +function unload_lp() { + unload_mod "$1" +} + +# disable_lp(modname) - disable a livepatch +# modname - module name to unload +function disable_lp() { + local mod="$1" + + log "% echo 0 > /sys/kernel/livepatch/$mod/enabled" + echo 0 > /sys/kernel/livepatch/"$mod"/enabled + + # Wait until the transition finishes and the livepatch gets + # removed from sysfs... + loop_until '[[ ! -e "/sys/kernel/livepatch/$mod" ]]' || + die "failed to disable livepatch $mod" +} + +# set_pre_patch_ret(modname, pre_patch_ret) +# modname - module name to set +# pre_patch_ret - new pre_patch_ret value +function set_pre_patch_ret { + local mod="$1"; shift + local ret="$1" + + log "% echo $ret > /sys/module/$mod/parameters/pre_patch_ret" + echo "$ret" > /sys/module/"$mod"/parameters/pre_patch_ret + + # Wait for sysfs value to hold ... + loop_until '[[ $(cat "/sys/module/$mod/parameters/pre_patch_ret") == "$ret" ]]' || + die "failed to set pre_patch_ret parameter for $mod module" +} + +# check_result() - verify dmesg output +# TODO - better filter, out of order msgs, etc? +function check_result { + local expect="$*" + local result + + result=$(dmesg | grep -v 'tainting' | grep -e 'livepatch:' -e 'test_klp' | sed 's/^\[[ 0-9.]*\] //') + + if [[ "$expect" == "$result" ]] ; then + echo "ok" + else + echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" + die "livepatch kselftest(s) failed" + fi +} diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh new file mode 100755 index 000000000000..e97a9dcb73c7 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -0,0 +1,587 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_callbacks_demo +MOD_LIVEPATCH2=test_klp_callbacks_demo2 +MOD_TARGET=test_klp_callbacks_mod +MOD_TARGET_BUSY=test_klp_callbacks_busy + +set_dynamic_debug + + +# TEST: target module before livepatch +# +# Test a combination of loading a kernel module and a livepatch that +# patches a function in the first module. Load the target module +# before the livepatch module. Unload them in the same order. +# +# - On livepatch enable, before the livepatch transition starts, +# pre-patch callbacks are executed for vmlinux and $MOD_TARGET (those +# klp_objects currently loaded). After klp_objects are patched +# according to the klp_patch, their post-patch callbacks run and the +# transition completes. +# +# - Similarly, on livepatch disable, pre-patch callbacks run before the +# unpatching transition starts. klp_objects are reverted, post-patch +# callbacks execute and the transition completes. + +echo -n "TEST: target module before livepatch ... " +dmesg -C + +load_mod $MOD_TARGET +load_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH +unload_mod $MOD_TARGET + +check_result "% modprobe $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_init +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit" + + +# TEST: module_coming notifier +# +# This test is similar to the previous test, but (un)load the livepatch +# module before the target kernel module. This tests the livepatch +# core's module_coming handler. +# +# - On livepatch enable, only pre/post-patch callbacks are executed for +# currently loaded klp_objects, in this case, vmlinux. +# +# - When a targeted module is subsequently loaded, only its +# pre/post-patch callbacks are executed. +# +# - On livepatch disable, all currently loaded klp_objects' (vmlinux and +# $MOD_TARGET) pre/post-unpatch callbacks are executed. + +echo -n "TEST: module_coming notifier ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +load_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH +unload_mod $MOD_TARGET + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_TARGET +livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_TARGET: ${MOD_TARGET}_init +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit" + + +# TEST: module_going notifier +# +# Test loading the livepatch after a targeted kernel module, then unload +# the kernel module before disabling the livepatch. This tests the +# livepatch core's module_going handler. +# +# - First load a target module, then the livepatch. +# +# - When a target module is unloaded, the livepatch is only reverted +# from that klp_object ($MOD_TARGET). As such, only its pre and +# post-unpatch callbacks are executed when this occurs. +# +# - When the livepatch is disabled, pre and post-unpatch callbacks are +# run for the remaining klp_object, vmlinux. + +echo -n "TEST: module_going notifier ... " +dmesg -C + +load_mod $MOD_TARGET +load_lp $MOD_LIVEPATCH +unload_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_init +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': patching complete +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: module_coming and module_going notifiers +# +# This test is similar to the previous test, however the livepatch is +# loaded first. This tests the livepatch core's module_coming and +# module_going handlers. +# +# - First load the livepatch. +# +# - When a targeted kernel module is subsequently loaded, only its +# pre/post-patch callbacks are executed. +# +# - When the target module is unloaded, the livepatch is only reverted +# from the $MOD_TARGET klp_object. As such, only pre and +# post-unpatch callbacks are executed when this occurs. + +echo -n "TEST: module_coming and module_going notifiers ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +load_mod $MOD_TARGET +unload_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_TARGET +livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_TARGET: ${MOD_TARGET}_init +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: target module not present +# +# A simple test of loading a livepatch without one of its patch target +# klp_objects ever loaded ($MOD_TARGET). +# +# - Load the livepatch. +# +# - As expected, only pre/post-(un)patch handlers are executed for +# vmlinux. + +echo -n "TEST: target module not present ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: pre-patch callback -ENODEV +# +# Test a scenario where a vmlinux pre-patch callback returns a non-zero +# status (ie, failure). +# +# - First load a target module. +# +# - Load the livepatch module, setting its 'pre_patch_ret' value to -19 +# (-ENODEV). When its vmlinux pre-patch callback executes, this +# status code will propagate back to the module-loading subsystem. +# The result is that the insmod command refuses to load the livepatch +# module. + +echo -n "TEST: pre-patch callback -ENODEV ... " +dmesg -C + +load_mod $MOD_TARGET +load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19 +unload_mod $MOD_TARGET + +check_result "% modprobe $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_init +% modprobe $MOD_LIVEPATCH pre_patch_ret=-19 +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +test_klp_callbacks_demo: pre_patch_callback: vmlinux +livepatch: pre-patch callback failed for object 'vmlinux' +livepatch: failed to enable patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit" + + +# TEST: module_coming + pre-patch callback -ENODEV +# +# Similar to the previous test, setup a livepatch such that its vmlinux +# pre-patch callback returns success. However, when a targeted kernel +# module is later loaded, have the livepatch return a failing status +# code. +# +# - Load the livepatch, vmlinux pre-patch callback succeeds. +# +# - Set a trap so subsequent pre-patch callbacks to this livepatch will +# return -ENODEV. +# +# - The livepatch pre-patch callback for subsequently loaded target +# modules will return failure, so the module loader refuses to load +# the kernel module. No post-patch or pre/post-unpatch callbacks are +# executed for this klp_object. +# +# - Pre/post-unpatch callbacks are run for the vmlinux klp_object. + +echo -n "TEST: module_coming + pre-patch callback -ENODEV ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +set_pre_patch_ret $MOD_LIVEPATCH -19 +load_failing_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% echo -19 > /sys/module/$MOD_LIVEPATCH/parameters/pre_patch_ret +% modprobe $MOD_TARGET +livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +livepatch: pre-patch callback failed for object '$MOD_TARGET' +livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET' +modprobe: ERROR: could not insert '$MOD_TARGET': No such device +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: multiple target modules +# +# Test loading multiple targeted kernel modules. This test-case is +# mainly for comparing with the next test-case. +# +# - Load a target "busy" kernel module which kicks off a worker function +# that immediately exits. +# +# - Proceed with loading the livepatch and another ordinary target +# module. Post-patch callbacks are executed and the transition +# completes quickly. + +echo -n "TEST: multiple target modules ... " +dmesg -C + +load_mod $MOD_TARGET_BUSY sleep_secs=0 +# give $MOD_TARGET_BUSY::busymod_work_func() a chance to run +sleep 5 +load_lp $MOD_LIVEPATCH +load_mod $MOD_TARGET +unload_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH +unload_mod $MOD_TARGET_BUSY + +check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=0 +$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init +$MOD_TARGET_BUSY: busymod_work_func, sleeping 0 seconds ... +$MOD_TARGET_BUSY: busymod_work_func exit +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_TARGET +livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_TARGET: ${MOD_TARGET}_init +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH +% rmmod $MOD_TARGET_BUSY +$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit" + + + +# TEST: busy target module +# +# A similar test as the previous one, but force the "busy" kernel module +# to do longer work. +# +# The livepatching core will refuse to patch a task that is currently +# executing a to-be-patched function -- the consistency model stalls the +# current patch transition until this safety-check is met. Test a +# scenario where one of a livepatch's target klp_objects sits on such a +# function for a long time. Meanwhile, load and unload other target +# kernel modules while the livepatch transition is in progress. +# +# - Load the "busy" kernel module, this time make it do 10 seconds worth +# of work. +# +# - Meanwhile, the livepatch is loaded. Notice that the patch +# transition does not complete as the targeted "busy" module is +# sitting on a to-be-patched function. +# +# - Load a second target module (this one is an ordinary idle kernel +# module). Note that *no* post-patch callbacks will be executed while +# the livepatch is still in transition. +# +# - Request an unload of the simple kernel module. The patch is still +# transitioning, so its pre-unpatch callbacks are skipped. +# +# - Finally the livepatch is disabled. Since none of the patch's +# klp_object's post-patch callbacks executed, the remaining +# klp_object's pre-unpatch callbacks are skipped. + +echo -n "TEST: busy target module ... " +dmesg -C + +load_mod $MOD_TARGET_BUSY sleep_secs=10 +load_lp_nowait $MOD_LIVEPATCH +# Don't wait for transition, load $MOD_TARGET while the transition +# is still stalled in $MOD_TARGET_BUSY::busymod_work_func() +sleep 5 +load_mod $MOD_TARGET +unload_mod $MOD_TARGET +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH +unload_mod $MOD_TARGET_BUSY + +check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=10 +$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init +$MOD_TARGET_BUSY: busymod_work_func, sleeping 10 seconds ... +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': starting patching transition +% modprobe $MOD_TARGET +livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' +$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init +$MOD_TARGET: ${MOD_TARGET}_init +% rmmod $MOD_TARGET +$MOD_TARGET: ${MOD_TARGET}_exit +livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': reversing transition from patching to unpatching +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH +% rmmod $MOD_TARGET_BUSY +$MOD_TARGET_BUSY: busymod_work_func exit +$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit" + + +# TEST: multiple livepatches +# +# Test loading multiple livepatches. This test-case is mainly for comparing +# with the next test-case. +# +# - Load and unload two livepatches, pre and post (un)patch callbacks +# execute as each patch progresses through its (un)patching +# transition. + +echo -n "TEST: multiple livepatches ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH2 +disable_lp $MOD_LIVEPATCH2 +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_LIVEPATCH2 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% rmmod $MOD_LIVEPATCH" + + +# TEST: atomic replace +# +# Load multiple livepatches, but the second as an 'atomic-replace' +# patch. When the latter loads, the original livepatch should be +# disabled and *none* of its pre/post-unpatch callbacks executed. On +# the other hand, when the atomic-replace livepatch is disabled, its +# pre/post-unpatch callbacks *should* be executed. +# +# - Load and unload two livepatches, the second of which has its +# .replace flag set true. +# +# - Pre and post patch callbacks are executed for both livepatches. +# +# - Once the atomic replace module is loaded, only its pre and post +# unpatch callbacks are executed. + +echo -n "TEST: atomic replace ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH2 replace=1 +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_LIVEPATCH2 replace=1 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% rmmod $MOD_LIVEPATCH" + + +exit 0 diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh new file mode 100755 index 000000000000..f05268aea859 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_livepatch +MOD_REPLACE=test_klp_atomic_replace + +set_dynamic_debug + + +# TEST: basic function patching +# - load a livepatch that modifies the output from /proc/cmdline and +# verify correct behavior +# - unload the livepatch and make sure the patch was removed + +echo -n "TEST: basic function patching ... " +dmesg -C + +load_lp $MOD_LIVEPATCH + +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: multiple livepatches +# - load a livepatch that modifies the output from /proc/cmdline and +# verify correct behavior +# - load another livepatch and verify that both livepatches are active +# - unload the second livepatch and verify that the first is still active +# - unload the first livepatch and verify none are active + +echo -n "TEST: multiple livepatches ... " +dmesg -C + +load_lp $MOD_LIVEPATCH + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +load_lp $MOD_REPLACE replace=0 + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +disable_lp $MOD_REPLACE +unload_lp $MOD_REPLACE + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +$MOD_LIVEPATCH: this has been live patched +% modprobe $MOD_REPLACE replace=0 +livepatch: enabling patch '$MOD_REPLACE' +livepatch: '$MOD_REPLACE': initializing patching transition +livepatch: '$MOD_REPLACE': starting patching transition +livepatch: '$MOD_REPLACE': completing patching transition +livepatch: '$MOD_REPLACE': patching complete +$MOD_LIVEPATCH: this has been live patched +$MOD_REPLACE: this has been live patched +% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +livepatch: '$MOD_REPLACE': initializing unpatching transition +livepatch: '$MOD_REPLACE': starting unpatching transition +livepatch: '$MOD_REPLACE': completing unpatching transition +livepatch: '$MOD_REPLACE': unpatching complete +% rmmod $MOD_REPLACE +$MOD_LIVEPATCH: this has been live patched +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: atomic replace livepatch +# - load a livepatch that modifies the output from /proc/cmdline and +# verify correct behavior +# - load an atomic replace livepatch and verify that only the second is active +# - remove the first livepatch and verify that the atomic replace livepatch +# is still active +# - remove the atomic replace livepatch and verify that none are active + +echo -n "TEST: atomic replace livepatch ... " +dmesg -C + +load_lp $MOD_LIVEPATCH + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +load_lp $MOD_REPLACE replace=1 + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +unload_lp $MOD_LIVEPATCH + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +disable_lp $MOD_REPLACE +unload_lp $MOD_REPLACE + +grep 'live patched' /proc/cmdline > /dev/kmsg +grep 'live patched' /proc/meminfo > /dev/kmsg + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +$MOD_LIVEPATCH: this has been live patched +% modprobe $MOD_REPLACE replace=1 +livepatch: enabling patch '$MOD_REPLACE' +livepatch: '$MOD_REPLACE': initializing patching transition +livepatch: '$MOD_REPLACE': starting patching transition +livepatch: '$MOD_REPLACE': completing patching transition +livepatch: '$MOD_REPLACE': patching complete +$MOD_REPLACE: this has been live patched +% rmmod $MOD_LIVEPATCH +$MOD_REPLACE: this has been live patched +% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +livepatch: '$MOD_REPLACE': initializing unpatching transition +livepatch: '$MOD_REPLACE': starting unpatching transition +livepatch: '$MOD_REPLACE': completing unpatching transition +livepatch: '$MOD_REPLACE': unpatching complete +% rmmod $MOD_REPLACE" + + +exit 0 diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh new file mode 100755 index 000000000000..04a37831e204 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +. $(dirname $0)/functions.sh + +MOD_TEST=test_klp_shadow_vars + +set_dynamic_debug + + +# TEST: basic shadow variable API +# - load a module that exercises the shadow variable API + +echo -n "TEST: basic shadow variable API ... " +dmesg -C + +load_mod $MOD_TEST +unload_mod $MOD_TEST + +check_result "% modprobe $MOD_TEST +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0 +$MOD_TEST: got expected NULL result +$MOD_TEST: shadow_ctor: PTR6 -> PTR1 +$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR1 = PTR6 +$MOD_TEST: shadow_ctor: PTR8 -> PTR2 +$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR2 = PTR8 +$MOD_TEST: shadow_ctor: PTR10 -> PTR3 +$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR3 = PTR10 +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR6 +$MOD_TEST: got expected PTR6 -> PTR1 result +$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR8 +$MOD_TEST: got expected PTR8 -> PTR2 result +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10 +$MOD_TEST: got expected PTR10 -> PTR3 result +$MOD_TEST: shadow_ctor: PTR11 -> PTR4 +$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11 +$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11 +$MOD_TEST: got expected PTR11 -> PTR4 result +$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR6) +$MOD_TEST: klp_shadow_free(obj=PTR5, id=0x1234, dtor=PTR13) +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0 +$MOD_TEST: got expected NULL result +$MOD_TEST: shadow_dtor(obj=PTR9, shadow_data=PTR8) +$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR13) +$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR0 +$MOD_TEST: got expected NULL result +$MOD_TEST: shadow_dtor(obj=PTR12, shadow_data=PTR11) +$MOD_TEST: klp_shadow_free(obj=PTR12, id=0x1234, dtor=PTR13) +$MOD_TEST: klp_shadow_get(obj=PTR12, id=0x1234) = PTR0 +$MOD_TEST: got expected NULL result +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10 +$MOD_TEST: got expected PTR10 -> PTR3 result +$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR10) +$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR13) +$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0 +$MOD_TEST: shadow_get() got expected NULL result +% rmmod test_klp_shadow_vars" + +exit 0 diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 10baa1652fc2..c67d32eeb668 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -54,6 +54,22 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) return fd; } +static int mfd_assert_reopen_fd(int fd_in) +{ + int r, fd; + char path[100]; + + sprintf(path, "/proc/self/fd/%d", fd_in); + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("re-open of existing fd %d failed\n", fd_in); + abort(); + } + + return fd; +} + static void mfd_fail_new(const char *name, unsigned int flags) { int r; @@ -255,6 +271,25 @@ static void mfd_assert_read(int fd) munmap(p, mfd_def_size); } +/* Test that PROT_READ + MAP_SHARED mappings work. */ +static void mfd_assert_read_shared(int fd) +{ + void *p; + + /* verify PROT_READ and MAP_SHARED *is* allowed */ + p = mmap(NULL, + mfd_def_size, + PROT_READ, + MAP_SHARED, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + munmap(p, mfd_def_size); +} + static void mfd_assert_write(int fd) { ssize_t l; @@ -693,6 +728,44 @@ static void test_seal_write(void) } /* + * Test SEAL_FUTURE_WRITE + * Test whether SEAL_FUTURE_WRITE actually prevents modifications. + */ +static void test_seal_future_write(void) +{ + int fd, fd2; + void *p; + + printf("%s SEAL-FUTURE-WRITE\n", memfd_str); + + fd = mfd_assert_new("kern_memfd_seal_future_write", + mfd_def_size, + MFD_CLOEXEC | MFD_ALLOW_SEALING); + + p = mfd_assert_mmap_shared(fd); + + mfd_assert_has_seals(fd, 0); + + mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE); + mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE); + + /* read should pass, writes should fail */ + mfd_assert_read(fd); + mfd_assert_read_shared(fd); + mfd_fail_write(fd); + + fd2 = mfd_assert_reopen_fd(fd); + /* read should pass, writes should still fail */ + mfd_assert_read(fd2); + mfd_assert_read_shared(fd2); + mfd_fail_write(fd2); + + munmap(p, mfd_def_size); + close(fd2); + close(fd); +} + +/* * Test SEAL_SHRINK * Test whether SEAL_SHRINK actually prevents shrinking */ @@ -945,6 +1018,7 @@ int main(int argc, char **argv) test_basic(); test_seal_write(); + test_seal_future_write(); test_seal_shrink(); test_seal_grow(); test_seal_resize(); diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c index ecc14d68e101..908de689a902 100644 --- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c +++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c @@ -25,7 +25,7 @@ unsigned long long clock_frequency; unsigned long long timebase_frequency; double timebase_multiplier; -static inline unsigned long long mftb(void) +static inline unsigned long mftb(void) { unsigned long low; diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 52b4710469d2..96043b9b9829 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -77,6 +77,14 @@ #define TEXASR_TE 0x0000000004000000 #define TEXASR_ROT 0x0000000002000000 +/* MSR register bits */ +#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ + +#define __MASK(X) (1UL<<(X)) + +/* macro to check TM MSR bits */ +#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */ + /* Vector Instructions */ #define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << 16) | \ ((rb) << 11) | (((xs) >> 5))) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index ae43a614835d..7636bf45d5d5 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -102,8 +102,10 @@ do { \ #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] +#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] #elif defined(__powerpc__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP] +#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_MSR] #else #error implement UCONTEXT_NIA #endif diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c index 167135bd92a8..af1b80265076 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c @@ -11,7 +11,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <signal.h> #include "ebb.h" diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 208452a93e2c..951fe855f7cd 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -11,6 +11,7 @@ tm-signal-context-chk-fpu tm-signal-context-chk-gpr tm-signal-context-chk-vmx tm-signal-context-chk-vsx +tm-signal-context-force-tm tm-signal-sigreturn-nt tm-vmx-unavail tm-unavailable diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 75a685359129..c0734ed0ef56 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -4,7 +4,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ - $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt + $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ + tm-signal-context-force-tm top_srcdir = ../../../../.. include ../../lib.mk @@ -20,6 +21,7 @@ $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 +$(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c new file mode 100644 index 000000000000..31717625f318 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018, Breno Leitao, Gustavo Romero, IBM Corp. + * + * This test raises a SIGUSR1 signal, and toggle the MSR[TS] + * fields at the signal handler. With MSR[TS] being set, the kernel will + * force a recheckpoint, which may cause a segfault when returning to + * user space. Since the test needs to re-run, the segfault needs to be + * caught and handled. + * + * In order to continue the test even after a segfault, the context is + * saved prior to the signal being raised, and it is restored when there is + * a segmentation fault. This happens for COUNT_MAX times. + * + * This test never fails (as returning EXIT_FAILURE). It either succeeds, + * or crash the kernel (on a buggy kernel). + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "tm.h" +#include "utils.h" +#include "reg.h" + +#define COUNT_MAX 5000 /* Number of interactions */ + +/* + * This test only runs on 64 bits system. Unsetting MSR_TS_S to avoid + * compilation issue on 32 bits system. There is no side effect, since the + * whole test will be skipped if it is not running on 64 bits system. + */ +#ifndef __powerpc64__ +#undef MSR_TS_S +#define MSR_TS_S 0 +#endif + +/* Setting contexts because the test will crash and we want to recover */ +ucontext_t init_context, main_context; + +static int count, first_time; + +void usr_signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + int ret; + + /* + * Allocating memory in a signal handler, and never freeing it on + * purpose, forcing the heap increase, so, the memory leak is what + * we want here. + */ + ucp->uc_link = mmap(NULL, sizeof(ucontext_t), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (ucp->uc_link == (void *)-1) { + perror("Mmap failed"); + exit(-1); + } + + /* Forcing the page to be allocated in a page fault */ + ret = madvise(ucp->uc_link, sizeof(ucontext_t), MADV_DONTNEED); + if (ret) { + perror("madvise failed"); + exit(-1); + } + + memcpy(&ucp->uc_link->uc_mcontext, &ucp->uc_mcontext, + sizeof(ucp->uc_mcontext)); + + /* Forcing to enable MSR[TM] */ + UCONTEXT_MSR(ucp) |= MSR_TS_S; + + /* + * A fork inside a signal handler seems to be more efficient than a + * fork() prior to the signal being raised. + */ + if (fork() == 0) { + /* + * Both child and parent will return, but, child returns + * with count set so it will exit in the next segfault. + * Parent will continue to loop. + */ + count = COUNT_MAX; + } + + /* + * If the change above does not hit the bug, it will cause a + * segmentation fault, since the ck structures are NULL. + */ +} + +void seg_signal_handler(int signo, siginfo_t *si, void *uc) +{ + if (count == COUNT_MAX) { + /* Return to tm_signal_force_msr() and exit */ + setcontext(&main_context); + } + + count++; + + /* Reexecute the test */ + setcontext(&init_context); +} + +void tm_trap_test(void) +{ + struct sigaction usr_sa, seg_sa; + stack_t ss; + + usr_sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + usr_sa.sa_sigaction = usr_signal_handler; + + seg_sa.sa_flags = SA_SIGINFO; + seg_sa.sa_sigaction = seg_signal_handler; + + /* + * Set initial context. Will get back here from + * seg_signal_handler() + */ + getcontext(&init_context); + + /* Allocated an alternative signal stack area */ + ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; + + if (ss.ss_sp == (void *)-1) { + perror("mmap error\n"); + exit(-1); + } + + /* Force the allocation through a page fault */ + if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { + perror("madvise\n"); + exit(-1); + } + + /* Setting an alternative stack to generate a page fault when + * the signal is raised. + */ + if (sigaltstack(&ss, NULL)) { + perror("sigaltstack\n"); + exit(-1); + } + + /* The signal handler will enable MSR_TS */ + sigaction(SIGUSR1, &usr_sa, NULL); + /* If it does not crash, it will segfault, avoid it to retest */ + sigaction(SIGSEGV, &seg_sa, NULL); + + raise(SIGUSR1); +} + +int tm_signal_context_force_tm(void) +{ + SKIP_IF(!have_htm()); + /* + * Skipping if not running on 64 bits system, since I think it is + * not possible to set mcontext's [MSR] with TS, due to it being 32 + * bits. + */ + SKIP_IF(!is_ppc64le()); + + /* Will get back here after COUNT_MAX interactions */ + getcontext(&main_context); + + if (!first_time++) + tm_trap_test(); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); +} diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 29bac5ef9a93..444ad39d3700 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -2,6 +2,7 @@ /fd-002-posix-eq /fd-003-kthread /proc-loadavg-001 +/proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 /proc-self-syscall diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 434d033ee067..5163dc887aa3 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 TEST_GEN_PROGS += proc-self-syscall diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index fcff7047000d..471e2aa28077 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -30,7 +30,7 @@ int main(void) if (unshare(CLONE_NEWPID) == -1) { if (errno == ENOSYS || errno == EPERM) - return 2; + return 4; return 1; } diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c new file mode 100644 index 000000000000..bbe8150d18aa --- /dev/null +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Fork and exec tiny 1 page executable which precisely controls its VM. + * Test /proc/$PID/maps + * Test /proc/$PID/smaps + * Test /proc/$PID/smaps_rollup + * Test /proc/$PID/statm + * + * FIXME require CONFIG_TMPFS which can be disabled + * FIXME test other values from "smaps" + * FIXME support other archs + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <linux/kdev_t.h> + +static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) +{ + return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); +} + +static void make_private_tmp(void) +{ + if (unshare(CLONE_NEWNS) == -1) { + if (errno == ENOSYS || errno == EPERM) { + exit(4); + } + exit(1); + } + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { + exit(1); + } + if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { + exit(1); + } +} + +static pid_t pid = -1; +static void ate(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +struct elf64_hdr { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +}; + +struct elf64_phdr { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +}; + +#ifdef __x86_64__ +#define PAGE_SIZE 4096 +#define VADDR (1UL << 32) +#define MAPS_OFFSET 73 + +#define syscall 0x0f, 0x05 +#define mov_rdi(x) \ + 0x48, 0xbf, \ + (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ + ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff + +#define mov_rsi(x) \ + 0x48, 0xbe, \ + (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ + ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff + +#define mov_eax(x) \ + 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff + +static const uint8_t payload[] = { + /* Casually unmap stack, vDSO and everything else. */ + /* munmap */ + mov_rdi(VADDR + 4096), + mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), + mov_eax(11), + syscall, + + /* Ping parent. */ + /* write(0, &c, 1); */ + 0x31, 0xff, /* xor edi, edi */ + 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ + 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ + mov_eax(1), + syscall, + + /* 1: pause(); */ + mov_eax(34), + syscall, + + 0xeb, 0xf7, /* jmp 1b */ +}; + +static int make_exe(const uint8_t *payload, size_t len) +{ + struct elf64_hdr h; + struct elf64_phdr ph; + + struct iovec iov[3] = { + {&h, sizeof(struct elf64_hdr)}, + {&ph, sizeof(struct elf64_phdr)}, + {(void *)payload, len}, + }; + int fd, fd1; + char buf[64]; + + memset(&h, 0, sizeof(h)); + h.e_ident[0] = 0x7f; + h.e_ident[1] = 'E'; + h.e_ident[2] = 'L'; + h.e_ident[3] = 'F'; + h.e_ident[4] = 2; + h.e_ident[5] = 1; + h.e_ident[6] = 1; + h.e_ident[7] = 0; + h.e_type = 2; + h.e_machine = 0x3e; + h.e_version = 1; + h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); + h.e_phoff = sizeof(struct elf64_hdr); + h.e_shoff = 0; + h.e_flags = 0; + h.e_ehsize = sizeof(struct elf64_hdr); + h.e_phentsize = sizeof(struct elf64_phdr); + h.e_phnum = 1; + h.e_shentsize = 0; + h.e_shnum = 0; + h.e_shstrndx = 0; + + memset(&ph, 0, sizeof(ph)); + ph.p_type = 1; + ph.p_flags = (1<<2)|1; + ph.p_offset = 0; + ph.p_vaddr = VADDR; + ph.p_paddr = 0; + ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); + ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload); + ph.p_align = 4096; + + fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); + if (fd == -1) { + exit(1); + } + + if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { + exit(1); + } + + /* Avoid ETXTBSY on exec. */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); + fd1 = open(buf, O_RDONLY|O_CLOEXEC); + close(fd); + + return fd1; +} +#endif + +#ifdef __x86_64__ +int main(void) +{ + int pipefd[2]; + int exec_fd; + + atexit(ate); + + make_private_tmp(); + + /* Reserve fd 0 for 1-byte pipe ping from child. */ + close(0); + if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { + return 1; + } + + exec_fd = make_exe(payload, sizeof(payload)); + + if (pipe(pipefd) == -1) { + return 1; + } + if (dup2(pipefd[1], 0) != 0) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + if (pid == 0) { + sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); + return 1; + } + + char _; + if (read(pipefd[0], &_, 1) != 1) { + return 1; + } + + struct stat st; + if (fstat(exec_fd, &st) == -1) { + return 1; + } + + /* Generate "head -n1 /proc/$PID/maps" */ + char buf0[256]; + memset(buf0, ' ', sizeof(buf0)); + int len = snprintf(buf0, sizeof(buf0), + "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", + VADDR, VADDR + PAGE_SIZE, + MAJOR(st.st_dev), MINOR(st.st_dev), + (unsigned long long)st.st_ino); + buf0[len] = ' '; + snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, + "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); + + + /* Test /proc/$PID/maps */ + { + char buf[256]; + ssize_t rv; + int fd; + + snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); + fd = open(buf, O_RDONLY); + if (fd == -1) { + return 1; + } + rv = read(fd, buf, sizeof(buf)); + assert(rv == strlen(buf0)); + assert(memcmp(buf, buf0, strlen(buf0)) == 0); + } + + /* Test /proc/$PID/smaps */ + { + char buf[1024]; + ssize_t rv; + int fd; + + snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); + fd = open(buf, O_RDONLY); + if (fd == -1) { + return 1; + } + rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv <= sizeof(buf)); + + assert(rv >= strlen(buf0)); + assert(memcmp(buf, buf0, strlen(buf0)) == 0); + +#define RSS1 "Rss: 4 kB\n" +#define RSS2 "Rss: 0 kB\n" +#define PSS1 "Pss: 4 kB\n" +#define PSS2 "Pss: 0 kB\n" + assert(memmem(buf, rv, RSS1, strlen(RSS1)) || + memmem(buf, rv, RSS2, strlen(RSS2))); + assert(memmem(buf, rv, PSS1, strlen(PSS1)) || + memmem(buf, rv, PSS2, strlen(PSS2))); + + static const char *S[] = { + "Size: 4 kB\n", + "KernelPageSize: 4 kB\n", + "MMUPageSize: 4 kB\n", + "Anonymous: 0 kB\n", + "AnonHugePages: 0 kB\n", + "Shared_Hugetlb: 0 kB\n", + "Private_Hugetlb: 0 kB\n", + "Locked: 0 kB\n", + }; + int i; + + for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + assert(memmem(buf, rv, S[i], strlen(S[i]))); + } + } + + /* Test /proc/$PID/smaps_rollup */ + { + char bufr[256]; + memset(bufr, ' ', sizeof(bufr)); + len = snprintf(bufr, sizeof(bufr), + "%08lx-%08lx ---p 00000000 00:00 0", + VADDR, VADDR + PAGE_SIZE); + bufr[len] = ' '; + snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, + "[rollup]\n"); + + char buf[1024]; + ssize_t rv; + int fd; + + snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); + fd = open(buf, O_RDONLY); + if (fd == -1) { + return 1; + } + rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv <= sizeof(buf)); + + assert(rv >= strlen(bufr)); + assert(memcmp(buf, bufr, strlen(bufr)) == 0); + + assert(memmem(buf, rv, RSS1, strlen(RSS1)) || + memmem(buf, rv, RSS2, strlen(RSS2))); + assert(memmem(buf, rv, PSS1, strlen(PSS1)) || + memmem(buf, rv, PSS2, strlen(PSS2))); + + static const char *S[] = { + "Anonymous: 0 kB\n", + "AnonHugePages: 0 kB\n", + "Shared_Hugetlb: 0 kB\n", + "Private_Hugetlb: 0 kB\n", + "Locked: 0 kB\n", + }; + int i; + + for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + assert(memmem(buf, rv, S[i], strlen(S[i]))); + } + } + + /* Test /proc/$PID/statm */ + { + char buf[64]; + ssize_t rv; + int fd; + + snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); + fd = open(buf, O_RDONLY); + if (fd == -1) { + return 1; + } + rv = read(fd, buf, sizeof(buf)); + assert(rv == 7 * 2); + + assert(buf[0] == '1'); /* ->total_vm */ + assert(buf[1] == ' '); + assert(buf[2] == '0' || buf[2] == '1'); /* rss */ + assert(buf[3] == ' '); + assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ + assert(buf[5] == ' '); + assert(buf[6] == '1'); /* ELF executable segments */ + assert(buf[7] == ' '); + assert(buf[8] == '0'); + assert(buf[9] == ' '); + assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ + assert(buf[11] == ' '); + assert(buf[12] == '0'); + assert(buf[13] == '\n'); + } + + return 0; +} +#else +int main(void) +{ + return 4; +} +#endif diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 85744425b08d..762cb01f2ca7 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -63,7 +63,7 @@ int main(void) p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); if (p == MAP_FAILED) { if (errno == EPERM) - return 2; + return 4; return 1; } diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 5ab5f4810e43..9f6d000c0245 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -20,7 +20,6 @@ #include <sys/stat.h> #include <fcntl.h> #include <errno.h> -#include <unistd.h> #include <string.h> #include <stdio.h> @@ -39,7 +38,7 @@ int main(void) fd = open("/proc/self/syscall", O_RDONLY); if (fd == -1) { if (errno == ENOENT) - return 2; + return 4; return 1; } diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c index a38b2fbaa7ad..b467b98a457d 100644 --- a/tools/testing/selftests/proc/proc-self-wchan.c +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -27,7 +27,7 @@ int main(void) fd = open("/proc/self/wchan", O_RDONLY); if (fd == -1) { if (errno == ENOENT) - return 2; + return 4; return 1; } diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index 563e752e6eba..b3ef9e14d6cc 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -26,8 +26,10 @@ #include <dirent.h> #include <stdbool.h> #include <stdlib.h> +#include <stdio.h> #include <string.h> #include <sys/stat.h> +#include <sys/vfs.h> #include <fcntl.h> #include <unistd.h> @@ -123,10 +125,22 @@ static void f(DIR *d, unsigned int level) int main(void) { DIR *d; + struct statfs sfs; d = opendir("/proc"); if (!d) + return 4; + + /* Ensure /proc is proc. */ + if (fstatfs(dirfd(d), &sfs) == -1) { + return 1; + } + if (sfs.f_type != 0x9fa0) { + fprintf(stderr, "error: unexpected f_type %lx\n", (long)sfs.f_type); return 2; + } + f(d, 0); + return 0; } diff --git a/tools/testing/selftests/safesetid/.gitignore b/tools/testing/selftests/safesetid/.gitignore new file mode 100644 index 000000000000..9c1a629bca01 --- /dev/null +++ b/tools/testing/selftests/safesetid/.gitignore @@ -0,0 +1 @@ +safesetid-test diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile new file mode 100644 index 000000000000..98da7a504737 --- /dev/null +++ b/tools/testing/selftests/safesetid/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for mount selftests. +CFLAGS = -Wall -lcap -O2 + +TEST_PROGS := run_tests.sh +TEST_GEN_FILES := safesetid-test + +include ../lib.mk diff --git a/tools/testing/selftests/safesetid/config b/tools/testing/selftests/safesetid/config new file mode 100644 index 000000000000..9d44e5c2e096 --- /dev/null +++ b/tools/testing/selftests/safesetid/config @@ -0,0 +1,2 @@ +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c new file mode 100644 index 000000000000..892c8e8b1b8b --- /dev/null +++ b/tools/testing/selftests/safesetid/safesetid-test.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <pwd.h> +#include <string.h> +#include <syscall.h> +#include <sys/capability.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdarg.h> + +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 +#endif + +#define ROOT_USER 0 +#define RESTRICTED_PARENT 1 +#define ALLOWED_CHILD1 2 +#define ALLOWED_CHILD2 3 +#define NO_POLICY_USER 4 + +char* add_whitelist_policy_file = "/sys/kernel/security/safesetid/add_whitelist_policy"; + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static bool vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + printf("vsnprintf failed: %s\n", + strerror(errno)); + return false; + } + if (buf_len >= sizeof(buf)) { + printf("vsnprintf output truncated\n"); + return false; + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return true; + return false; + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + printf("short write to %s\n", filename); + return false; + } else { + printf("write to %s failed: %s\n", + filename, strerror(errno)); + return false; + } + } + if (close(fd) != 0) { + printf("close of %s failed: %s\n", + filename, strerror(errno)); + return false; + } + return true; +} + +static bool write_file(char *filename, char *fmt, ...) +{ + va_list ap; + bool ret; + + va_start(ap, fmt); + ret = vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + + return ret; +} + +static void ensure_user_exists(uid_t uid) +{ + struct passwd p; + + FILE *fd; + char name_str[10]; + + if (getpwuid(uid) == NULL) { + memset(&p,0x00,sizeof(p)); + fd=fopen("/etc/passwd","a"); + if (fd == NULL) + die("couldn't open file\n"); + if (fseek(fd, 0, SEEK_END)) + die("couldn't fseek\n"); + snprintf(name_str, 10, "%d", uid); + p.pw_name=name_str; + p.pw_uid=uid; + p.pw_gecos="Test account"; + p.pw_dir="/dev/null"; + p.pw_shell="/bin/false"; + int value = putpwent(&p,fd); + if (value != 0) + die("putpwent failed\n"); + if (fclose(fd)) + die("fclose failed\n"); + } +} + +static void ensure_securityfs_mounted(void) +{ + int fd = open(add_whitelist_policy_file, O_WRONLY); + if (fd < 0) { + if (errno == ENOENT) { + // Need to mount securityfs + if (mount("securityfs", "/sys/kernel/security", + "securityfs", 0, NULL) < 0) + die("mounting securityfs failed\n"); + } else { + die("couldn't find securityfs for unknown reason\n"); + } + } else { + if (close(fd) != 0) { + die("close of %s failed: %s\n", + add_whitelist_policy_file, strerror(errno)); + } + } +} + +static void write_policies(void) +{ + ssize_t written; + int fd; + + fd = open(add_whitelist_policy_file, O_WRONLY); + if (fd < 0) + die("cant open add_whitelist_policy file\n"); + written = write(fd, "1:2", strlen("1:2")); + if (written != strlen("1:2")) { + if (written >= 0) { + die("short write to %s\n", add_whitelist_policy_file); + } else { + die("write to %s failed: %s\n", + add_whitelist_policy_file, strerror(errno)); + } + } + written = write(fd, "1:3", strlen("1:3")); + if (written != strlen("1:3")) { + if (written >= 0) { + die("short write to %s\n", add_whitelist_policy_file); + } else { + die("write to %s failed: %s\n", + add_whitelist_policy_file, strerror(errno)); + } + } + if (close(fd) != 0) { + die("close of %s failed: %s\n", + add_whitelist_policy_file, strerror(errno)); + } +} + +static bool test_userns(bool expect_success) +{ + uid_t uid; + char map_file_name[32]; + size_t sz = sizeof(map_file_name); + pid_t cpid; + bool success; + + uid = getuid(); + + int clone_flags = CLONE_NEWUSER; + cpid = syscall(SYS_clone, clone_flags, NULL); + if (cpid == -1) { + printf("clone failed"); + return false; + } + + if (cpid == 0) { /* Code executed by child */ + // Give parent 1 second to write map file + sleep(1); + exit(EXIT_SUCCESS); + } else { /* Code executed by parent */ + if(snprintf(map_file_name, sz, "/proc/%d/uid_map", cpid) < 0) { + printf("preparing file name string failed"); + return false; + } + success = write_file(map_file_name, "0 0 1", uid); + return success == expect_success; + } + + printf("should not reach here"); + return false; +} + +static void test_setuid(uid_t child_uid, bool expect_success) +{ + pid_t cpid, w; + int wstatus; + + cpid = fork(); + if (cpid == -1) { + die("fork\n"); + } + + if (cpid == 0) { /* Code executed by child */ + setuid(child_uid); + if (getuid() == child_uid) + exit(EXIT_SUCCESS); + else + exit(EXIT_FAILURE); + } else { /* Code executed by parent */ + do { + w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED); + if (w == -1) { + die("waitpid\n"); + } + + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) { + if (expect_success) { + return; + } else { + die("unexpected success\n"); + } + } else { + if (expect_success) { + die("unexpected failure\n"); + } else { + return; + } + } + } else if (WIFSIGNALED(wstatus)) { + if (WTERMSIG(wstatus) == 9) { + if (expect_success) + die("killed unexpectedly\n"); + else + return; + } else { + die("unexpected signal: %d\n", wstatus); + } + } else { + die("unexpected status: %d\n", wstatus); + } + } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus)); + } + + die("should not reach here\n"); +} + +static void ensure_users_exist(void) +{ + ensure_user_exists(ROOT_USER); + ensure_user_exists(RESTRICTED_PARENT); + ensure_user_exists(ALLOWED_CHILD1); + ensure_user_exists(ALLOWED_CHILD2); + ensure_user_exists(NO_POLICY_USER); +} + +static void drop_caps(bool setid_retained) +{ + cap_value_t cap_values[] = {CAP_SETUID, CAP_SETGID}; + cap_t caps; + + caps = cap_get_proc(); + if (setid_retained) + cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET); + else + cap_clear(caps); + cap_set_proc(caps); + cap_free(caps); +} + +int main(int argc, char **argv) +{ + ensure_users_exist(); + ensure_securityfs_mounted(); + write_policies(); + + if (prctl(PR_SET_KEEPCAPS, 1L)) + die("Error with set keepcaps\n"); + + // First test to make sure we can write userns mappings from a user + // that doesn't have any restrictions (as long as it has CAP_SETUID); + setuid(NO_POLICY_USER); + setgid(NO_POLICY_USER); + + // Take away all but setid caps + drop_caps(true); + + // Need PR_SET_DUMPABLE flag set so we can write /proc/[pid]/uid_map + // from non-root parent process. + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0)) + die("Error with set dumpable\n"); + + if (!test_userns(true)) { + die("test_userns failed when it should work\n"); + } + + setuid(RESTRICTED_PARENT); + setgid(RESTRICTED_PARENT); + + test_setuid(ROOT_USER, false); + test_setuid(ALLOWED_CHILD1, true); + test_setuid(ALLOWED_CHILD2, true); + test_setuid(NO_POLICY_USER, false); + + if (!test_userns(false)) { + die("test_userns worked when it should fail\n"); + } + + // Now take away all caps + drop_caps(false); + test_setuid(2, false); + test_setuid(3, false); + test_setuid(4, false); + + // NOTE: this test doesn't clean up users that were created in + // /etc/passwd or flush policies that were added to the LSM. + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/safesetid/safesetid-test.sh b/tools/testing/selftests/safesetid/safesetid-test.sh new file mode 100755 index 000000000000..e4fdce675c54 --- /dev/null +++ b/tools/testing/selftests/safesetid/safesetid-test.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +TCID="safesetid-test.sh" +errcode=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +check_root() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo $TCID: must be run as root >&2 + exit $ksft_skip + fi +} + +main_function() +{ + check_root + ./safesetid-test +} + +main_function +echo "$TCID: done" +exit $errcode diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7e632b465ab4..f69d2ee29742 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2611,6 +2611,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) { long ret, sib; void *status; + struct timespec delay = { .tv_nsec = 100000000 }; ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); @@ -2664,7 +2665,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) - sleep(0.1); + nanosleep(&delay, NULL); /* Switch to the remaining sibling */ sib = !sib; @@ -2689,7 +2690,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) EXPECT_EQ(0, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) - sleep(0.1); + nanosleep(&delay, NULL); ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); @@ -2971,6 +2972,12 @@ TEST(get_metadata) struct seccomp_metadata md; long ret; + /* Only real root can get metadata. */ + if (geteuid()) { + XFAIL(return, "get_metadata requires real root"); + return; + } + ASSERT_EQ(0, pipe(pipefd)); pid = fork(); @@ -2985,11 +2992,11 @@ TEST(get_metadata) }; /* one with log, one without */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, + EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, &prog)); - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); + EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); - ASSERT_EQ(0, close(pipefd[0])); + EXPECT_EQ(0, close(pipefd[0])); ASSERT_EQ(1, write(pipefd[1], "1", 1)); ASSERT_EQ(0, close(pipefd[1])); @@ -3062,6 +3069,11 @@ TEST(user_notification_basic) .filter = filter, }; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + pid = fork(); ASSERT_GE(pid, 0); @@ -3077,7 +3089,7 @@ TEST(user_notification_basic) EXPECT_EQ(true, WIFEXITED(status)); EXPECT_EQ(0, WEXITSTATUS(status)); - /* Add some no-op filters so for grins. */ + /* Add some no-op filters for grins. */ EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); @@ -3143,6 +3155,11 @@ TEST(user_notification_kill_in_middle) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3190,6 +3207,11 @@ TEST(user_notification_signal) struct seccomp_notif_resp resp = {}; char c; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); listener = user_trap_syscall(__NR_gettid, @@ -3255,6 +3277,11 @@ TEST(user_notification_closed_listener) long ret; int status, listener; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3287,7 +3314,7 @@ TEST(user_notification_child_pid_ns) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3324,6 +3351,10 @@ TEST(user_notification_sibling_pid_ns) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; + ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3386,6 +3417,8 @@ TEST(user_notification_fault_recv) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; + ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); diff --git a/tools/testing/selftests/tmpfs/.gitignore b/tools/testing/selftests/tmpfs/.gitignore new file mode 100644 index 000000000000..a96838fad74d --- /dev/null +++ b/tools/testing/selftests/tmpfs/.gitignore @@ -0,0 +1 @@ +/bug-link-o-tmpfile diff --git a/tools/testing/selftests/tmpfs/Makefile b/tools/testing/selftests/tmpfs/Makefile new file mode 100644 index 000000000000..953c81299181 --- /dev/null +++ b/tools/testing/selftests/tmpfs/Makefile @@ -0,0 +1,7 @@ +CFLAGS += -Wall -O2 +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS := +TEST_GEN_PROGS += bug-link-o-tmpfile + +include ../lib.mk diff --git a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c new file mode 100644 index 000000000000..b5c3ddb90942 --- /dev/null +++ b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that open(O_TMPFILE), linkat() doesn't screw accounting. */ +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/mount.h> +#include <unistd.h> + +int main(void) +{ + int fd; + + if (unshare(CLONE_NEWNS) == -1) { + if (errno == ENOSYS || errno == EPERM) { + fprintf(stderr, "error: unshare, errno %d\n", errno); + return 4; + } + fprintf(stderr, "error: unshare, errno %d\n", errno); + return 1; + } + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { + fprintf(stderr, "error: mount '/', errno %d\n", errno); + return 1; + } + + /* Our heroes: 1 root inode, 1 O_TMPFILE inode, 1 permanent inode. */ + if (mount(NULL, "/tmp", "tmpfs", 0, "nr_inodes=3") == -1) { + fprintf(stderr, "error: mount tmpfs, errno %d\n", errno); + return 1; + } + + fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600); + if (fd == -1) { + fprintf(stderr, "error: open 1, errno %d\n", errno); + return 1; + } + if (linkat(fd, "", AT_FDCWD, "/tmp/1", AT_EMPTY_PATH) == -1) { + fprintf(stderr, "error: linkat, errno %d\n", errno); + return 1; + } + close(fd); + + fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600); + if (fd == -1) { + fprintf(stderr, "error: open 2, errno %d\n", errno); + return 1; + } + + return 0; +} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 9b777fa95f09..5a2d7b8efc40 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -23,6 +23,14 @@ #define MAP_HUGETLB 0x40000 /* arch specific */ #endif +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#ifndef MAP_HUGE_MASK +#define MAP_HUGE_MASK 0x3f +#endif + /* Only ia64 requires this */ #ifdef __ia64__ #define ADDR (void *)(0x8000000000000000UL) @@ -58,12 +66,29 @@ static int read_bytes(char *addr) return 0; } -int main(void) +int main(int argc, char **argv) { void *addr; int ret; + size_t length = LENGTH; + int flags = FLAGS; + int shift = 0; + + if (argc > 1) + length = atol(argv[1]) << 20; + if (argc > 2) { + shift = atoi(argv[2]); + if (shift) + flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT; + } + + if (shift) + printf("%u kB hugepages\n", 1 << shift); + else + printf("Default size hugepages\n"); + printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0); + addr = mmap(ADDR, length, PROTECTION, flags, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 584a91ae4a8f..951c507a27f7 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -211,4 +211,20 @@ else echo "[PASS]" fi +echo "------------------------------------" +echo "running vmalloc stability smoke test" +echo "------------------------------------" +./test_vmalloc.sh smoke +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/vm/test_vmalloc.sh new file mode 100644 index 000000000000..06d2bb109f06 --- /dev/null +++ b/tools/testing/selftests/vm/test_vmalloc.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> +# +# This is a test script for the kernel test driver to analyse vmalloc +# allocator. Therefore it is just a kernel module loader. You can specify +# and pass different parameters in order to: +# a) analyse performance of vmalloc allocations; +# b) stressing and stability check of vmalloc subsystem. + +TEST_NAME="vmalloc" +DRIVER="test_${TEST_NAME}" + +# 1 if fails +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# +# Static templates for performance, stressing and smoke tests. +# Also it is possible to pass any supported parameters manualy. +# +PERF_PARAM="single_cpu_test=1 sequential_test_order=1 test_repeat_count=3" +SMOKE_PARAM="single_cpu_test=1 test_loop_count=10000 test_repeat_count=10" +STRESS_PARAM="test_repeat_count=20" + +check_test_requirements() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit $ksft_skip + fi + + if ! which modprobe > /dev/null 2>&1; then + echo "$0: You need modprobe installed" + exit $ksft_skip + fi + + if ! modinfo $DRIVER > /dev/null 2>&1; then + echo "$0: You must have the following enabled in your kernel:" + echo "CONFIG_TEST_VMALLOC=m" + exit $ksft_skip + fi +} + +run_perfformance_check() +{ + echo "Run performance tests to evaluate how fast vmalloc allocation is." + echo "It runs all test cases on one single CPU with sequential order." + + modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1 + echo "Done." + echo "Ccheck the kernel message buffer to see the summary." +} + +run_stability_check() +{ + echo "Run stability tests. In order to stress vmalloc subsystem we run" + echo "all available test cases on all available CPUs simultaneously." + echo "It will take time, so be patient." + + modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1 + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +run_smoke_check() +{ + echo "Run smoke test. Note, this test provides basic coverage." + echo "Please check $0 output how it can be used" + echo "for deep performance analysis as well as stress testing." + + modprobe $DRIVER $SMOKE_PARAM > /dev/null 2>&1 + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +usage() +{ + echo -n "Usage: $0 [ performance ] | [ stress ] | | [ smoke ] | " + echo "manual parameters" + echo + echo "Valid tests and parameters:" + echo + modinfo $DRIVER + echo + echo "Example usage:" + echo + echo "# Shows help message" + echo "./${DRIVER}.sh" + echo + echo "# Runs 1 test(id_1), repeats it 5 times on all online CPUs" + echo "./${DRIVER}.sh run_test_mask=1 test_repeat_count=5" + echo + echo -n "# Runs 4 tests(id_1|id_2|id_4|id_16) on one CPU with " + echo "sequential order" + echo -n "./${DRIVER}.sh single_cpu_test=1 sequential_test_order=1 " + echo "run_test_mask=23" + echo + echo -n "# Runs all tests on all online CPUs, shuffled order, repeats " + echo "20 times" + echo "./${DRIVER}.sh test_repeat_count=20" + echo + echo "# Performance analysis" + echo "./${DRIVER}.sh performance" + echo + echo "# Stress testing" + echo "./${DRIVER}.sh stress" + echo + exit 0 +} + +function validate_passed_args() +{ + VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'` + + # + # Something has been passed, check it. + # + for passed_arg in $@; do + key=${passed_arg//=*/} + val="${passed_arg:$((${#key}+1))}" + valid=0 + + for valid_arg in $VALID_ARGS; do + if [[ $key = $valid_arg ]] && [[ $val -gt 0 ]]; then + valid=1 + break + fi + done + + if [[ $valid -ne 1 ]]; then + echo "Error: key or value is not correct: ${key} $val" + exit $exitcode + fi + done +} + +function run_manual_check() +{ + # + # Validate passed parameters. If there is wrong one, + # the script exists and does not execute further. + # + validate_passed_args $@ + + echo "Run the test with following parameters: $@" + modprobe $DRIVER $@ > /dev/null 2>&1 + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +function run_test() +{ + if [ $# -eq 0 ]; then + usage + else + if [[ "$1" = "performance" ]]; then + run_perfformance_check + elif [[ "$1" = "stress" ]]; then + run_stability_check + elif [[ "$1" = "smoke" ]]; then + run_smoke_check + else + run_manual_check $@ + fi + fi +} + +check_test_requirements +run_test $@ + +exit 0 diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 1ff3a6c0367b..6f64b2b93234 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -133,7 +133,7 @@ static const char * const page_flag_names[] = { [KPF_NOPAGE] = "n:nopage", [KPF_KSM] = "x:ksm", [KPF_THP] = "t:thp", - [KPF_BALLOON] = "o:balloon", + [KPF_OFFLINE] = "o:offline", [KPF_PGTABLE] = "g:pgtable", [KPF_ZERO_PAGE] = "z:zero_page", [KPF_IDLE] = "i:idle_page", diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 334b16db0ebb..73818f1b2ef8 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -110,39 +110,42 @@ static void fatal(const char *x, ...) static void usage(void) { printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" - "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" + "slabinfo [-aADefhilnosrStTvz1LXBU] [N=K] [-dafzput] [slab-regexp]\n" "-a|--aliases Show aliases\n" "-A|--activity Most active slabs first\n" - "-d<options>|--debug=<options> Set/Clear Debug options\n" + "-B|--Bytes Show size in bytes\n" "-D|--display-active Switch line format to activity\n" "-e|--empty Show empty slabs\n" "-f|--first-alias Show first alias\n" "-h|--help Show usage information\n" "-i|--inverted Inverted list\n" "-l|--slabs Show slabs\n" + "-L|--Loss Sort by loss\n" "-n|--numa Show NUMA information\n" - "-o|--ops Show kmem_cache_ops\n" + "-N|--lines=K Show the first K slabs\n" + "-o|--ops Show kmem_cache_ops\n" + "-r|--report Detailed report on single slabs\n" "-s|--shrink Shrink slabs\n" - "-r|--report Detailed report on single slabs\n" "-S|--Size Sort by size\n" "-t|--tracking Show alloc/free information\n" "-T|--Totals Show summary information\n" + "-U|--Unreclaim Show unreclaimable slabs only\n" "-v|--validate Validate slabs\n" "-z|--zero Include empty slabs\n" "-1|--1ref Single reference\n" - "-N|--lines=K Show the first K slabs\n" - "-L|--Loss Sort by loss\n" "-X|--Xtotals Show extended summary information\n" - "-B|--Bytes Show size in bytes\n" - "-U|--Unreclaim Show unreclaimable slabs only\n" - "\nValid debug options (FZPUT may be combined)\n" - "a / A Switch on all debug options (=FZUP)\n" - "- Switch off all debug options\n" - "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" - "z / Z Redzoning\n" - "p / P Poisoning\n" - "u / U Tracking\n" - "t / T Tracing\n" + + "\n" + "-d | --debug Switch off all debug options\n" + "-da | --debug=a Switch on all debug options (--debug=FZPU)\n" + + "\n" + "-d[afzput] | --debug=[afzput]\n" + " f | F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" + " z | Z Redzoning\n" + " p | P Poisoning\n" + " u | U Tracking\n" + " t | T Tracing\n" ); } |