diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2022-12-16 13:53:53 +0100 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2022-12-16 13:53:53 +0100 |
commit | 1a931707ad4a46e79d4ecfee56d8f6e8cc8d4f28 (patch) | |
tree | c3ed4dafca580360da63eef576b35eb67eb2e5a2 /tools | |
parent | perf tools: Use "grep -E" instead of "egrep" (diff) | |
parent | Merge tag 'exfat-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/gi... (diff) | |
download | linux-1a931707ad4a46e79d4ecfee56d8f6e8cc8d4f28.tar.xz linux-1a931707ad4a46e79d4ecfee56d8f6e8cc8d4f28.zip |
Merge remote-tracking branch 'torvalds/master' into perf/core
To resolve a trivial merge conflict with c302378bc157f6a7 ("libbpf:
Hashmap interface update to allow both long and void* keys/values"),
where a function present upstream was removed in the perf tools
development tree.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
504 files changed, 36419 insertions, 5547 deletions
diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c index 8353d3237e50..90c4a37f53d9 100644 --- a/tools/accounting/procacct.c +++ b/tools/accounting/procacct.c @@ -261,25 +261,20 @@ void handle_aggr(int mother, struct nlattr *na, int fd) int main(int argc, char *argv[]) { - int c, rc, rep_len, aggr_len, len2; - int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC; + int c, rc, rep_len; __u16 id; __u32 mypid; struct nlattr *na; int nl_sd = -1; int len = 0; - pid_t tid = 0; int fd = 0; int write_file = 0; int maskset = 0; char *logfile = NULL; - int containerset = 0; - char *containerpath = NULL; int cfd = 0; int forking = 0; - sigset_t sigset; struct msgtemplate msg; diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 1f5e26aae9fc..365cf182df12 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -8,6 +8,7 @@ #define LOCK_PREFIX "\n\tlock; " +#include <asm/asm.h> #include <asm/cmpxchg.h> /* @@ -70,4 +71,14 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) return cmpxchg(&v->counter, old, new); } +static inline int test_and_set_bit(long nr, unsigned long *addr) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c"); +} + +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, "Ir", nr, "%0", "c"); +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 46de10a809ec..649e50a8f9dd 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -53,14 +53,6 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ diff --git a/tools/arch/x86/intel_sdsi/intel_sdsi.c b/tools/arch/x86/intel_sdsi/intel_sdsi.c index c0e2f2349db4..2cd92761f171 100644 --- a/tools/arch/x86/intel_sdsi/intel_sdsi.c +++ b/tools/arch/x86/intel_sdsi/intel_sdsi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * sdsi: Intel Software Defined Silicon tool for provisioning certificates - * and activation payloads on supported cpus. + * sdsi: Intel On Demand (formerly Software Defined Silicon) tool for + * provisioning certificates and activation payloads on supported cpus. * * See https://github.com/intel/intel-sdsi/blob/master/os-interface.rst * for register descriptions. @@ -22,19 +22,54 @@ #include <sys/types.h> +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + #define SDSI_DEV "intel_vsec.sdsi" #define AUX_DEV_PATH "/sys/bus/auxiliary/devices/" #define SDSI_PATH (AUX_DEV_DIR SDSI_DEV) -#define GUID 0x6dd191 -#define REGISTERS_MIN_SIZE 72 +#define GUID_V1 0x6dd191 +#define REGS_SIZE_GUID_V1 72 +#define GUID_V2 0xF210D9EF +#define REGS_SIZE_GUID_V2 80 +#define STATE_CERT_MAX_SIZE 4096 +#define METER_CERT_MAX_SIZE 4096 +#define STATE_MAX_NUM_LICENSES 16 +#define STATE_MAX_NUM_IN_BUNDLE (uint32_t)8 +#define METER_MAX_NUM_BUNDLES 8 #define __round_mask(x, y) ((__typeof__(x))((y) - 1)) #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +struct nvram_content_auth_err_sts { + uint64_t reserved:3; + uint64_t sdsi_content_auth_err:1; + uint64_t reserved1:1; + uint64_t sdsi_metering_auth_err:1; + uint64_t reserved2:58; +}; + struct enabled_features { uint64_t reserved:3; uint64_t sdsi:1; - uint64_t reserved1:60; + uint64_t reserved1:8; + uint64_t attestation:1; + uint64_t reserved2:13; + uint64_t metering:1; + uint64_t reserved3:37; +}; + +struct key_provision_status { + uint64_t reserved:1; + uint64_t license_key_provisioned:1; + uint64_t reserved2:62; }; struct auth_fail_count { @@ -49,31 +84,102 @@ struct availability { uint64_t reserved:48; uint64_t available:3; uint64_t threshold:3; + uint64_t reserved2:10; +}; + +struct nvram_update_limit { + uint64_t reserved:12; + uint64_t sdsi_50_pct:1; + uint64_t sdsi_75_pct:1; + uint64_t sdsi_90_pct:1; + uint64_t reserved2:49; }; struct sdsi_regs { uint64_t ppin; - uint64_t reserved; + struct nvram_content_auth_err_sts auth_err_sts; struct enabled_features en_features; - uint64_t reserved1; + struct key_provision_status key_prov_sts; struct auth_fail_count auth_fail_count; struct availability prov_avail; - uint64_t reserved2; - uint64_t reserved3; - uint64_t socket_id; + struct nvram_update_limit limits; + uint64_t pcu_cr3_capid_cfg; + union { + struct { + uint64_t socket_id; + } v1; + struct { + uint64_t reserved; + uint64_t socket_id; + uint64_t reserved2; + } v2; + } extra; +}; +#define CONTENT_TYPE_LK_ENC 0xD +#define CONTENT_TYPE_LK_BLOB_ENC 0xE + +struct state_certificate { + uint32_t content_type; + uint32_t region_rev_id; + uint32_t header_size; + uint32_t total_size; + uint32_t key_size; + uint32_t num_licenses; +}; + +struct license_key_info { + uint32_t key_rev_id; + uint64_t key_image_content[6]; +} __packed; + +#define LICENSE_BLOB_SIZE(l) (((l) & 0x7fffffff) * 4) +#define LICENSE_VALID(l) (!!((l) & 0x80000000)) + +// License Group Types +#define LBT_ONE_TIME_UPGRADE 1 +#define LBT_METERED_UPGRADE 2 + +struct license_blob_content { + uint32_t type; + uint64_t id; + uint64_t ppin; + uint64_t previous_ppin; + uint32_t rev_id; + uint32_t num_bundles; +} __packed; + +struct bundle_encoding { + uint32_t encoding; + uint32_t encoding_rsvd[7]; +}; + +struct meter_certificate { + uint32_t block_signature; + uint32_t counter_unit; + uint64_t ppin; + uint32_t bundle_length; + uint32_t reserved; + uint32_t mmrc_encoding; + uint32_t mmrc_counter; +}; + +struct bundle_encoding_counter { + uint32_t encoding; + uint32_t counter; }; struct sdsi_dev { struct sdsi_regs regs; + struct state_certificate sc; char *dev_name; char *dev_path; - int guid; + uint32_t guid; }; enum command { - CMD_NONE, CMD_SOCKET_INFO, - CMD_DUMP_CERT, + CMD_METER_CERT, + CMD_STATE_CERT, CMD_PROV_AKC, CMD_PROV_CAP, }; @@ -98,7 +204,7 @@ static void sdsi_list_devices(void) } if (!found) - fprintf(stderr, "No sdsi devices found.\n"); + fprintf(stderr, "No On Demand devices found.\n"); } static int sdsi_update_registers(struct sdsi_dev *s) @@ -121,7 +227,7 @@ static int sdsi_update_registers(struct sdsi_dev *s) return -1; } - if (s->guid != GUID) { + if (s->guid != GUID_V1 && s->guid != GUID_V2) { fprintf(stderr, "Unrecognized guid, 0x%x\n", s->guid); fclose(regs_ptr); return -1; @@ -129,7 +235,8 @@ static int sdsi_update_registers(struct sdsi_dev *s) /* Update register info for this guid */ ret = fread(&s->regs, sizeof(uint8_t), sizeof(s->regs), regs_ptr); - if (ret != sizeof(s->regs)) { + if ((s->guid == GUID_V1 && ret != REGS_SIZE_GUID_V1) || + (s->guid == GUID_V2 && ret != REGS_SIZE_GUID_V2)) { fprintf(stderr, "Could not read 'registers' file\n"); fclose(regs_ptr); return -1; @@ -153,8 +260,18 @@ static int sdsi_read_reg(struct sdsi_dev *s) printf("Socket information for device %s\n", s->dev_name); printf("\n"); printf("PPIN: 0x%lx\n", s->regs.ppin); + printf("NVRAM Content Authorization Error Status\n"); + printf(" SDSi Auth Err Sts: %s\n", !!s->regs.auth_err_sts.sdsi_content_auth_err ? "Error" : "Okay"); + + if (!!s->regs.en_features.metering) + printf(" Metering Auth Err Sts: %s\n", !!s->regs.auth_err_sts.sdsi_metering_auth_err ? "Error" : "Okay"); + printf("Enabled Features\n"); - printf(" SDSi: %s\n", !!s->regs.en_features.sdsi ? "Enabled" : "Disabled"); + printf(" On Demand: %s\n", !!s->regs.en_features.sdsi ? "Enabled" : "Disabled"); + printf(" Attestation: %s\n", !!s->regs.en_features.attestation ? "Enabled" : "Disabled"); + printf(" On Demand: %s\n", !!s->regs.en_features.sdsi ? "Enabled" : "Disabled"); + printf(" Metering: %s\n", !!s->regs.en_features.metering ? "Enabled" : "Disabled"); + printf("License Key (AKC) Provisioned: %s\n", !!s->regs.key_prov_sts.license_key_provisioned ? "Yes" : "No"); printf("Authorization Failure Count\n"); printf(" AKC Failure Count: %d\n", s->regs.auth_fail_count.key_failure_count); printf(" AKC Failure Threshold: %d\n", s->regs.auth_fail_count.key_failure_threshold); @@ -163,25 +280,148 @@ static int sdsi_read_reg(struct sdsi_dev *s) printf("Provisioning Availability\n"); printf(" Updates Available: %d\n", s->regs.prov_avail.available); printf(" Updates Threshold: %d\n", s->regs.prov_avail.threshold); - printf("Socket ID: %ld\n", s->regs.socket_id & 0xF); + printf("NVRAM Udate Limit\n"); + printf(" 50%% Limit Reached: %s\n", !!s->regs.limits.sdsi_50_pct ? "Yes" : "No"); + printf(" 75%% Limit Reached: %s\n", !!s->regs.limits.sdsi_75_pct ? "Yes" : "No"); + printf(" 90%% Limit Reached: %s\n", !!s->regs.limits.sdsi_90_pct ? "Yes" : "No"); + if (s->guid == GUID_V1) + printf("Socket ID: %ld\n", s->regs.extra.v1.socket_id & 0xF); + else + printf("Socket ID: %ld\n", s->regs.extra.v2.socket_id & 0xF); return 0; } -static int sdsi_certificate_dump(struct sdsi_dev *s) +static char *license_blob_type(uint32_t type) +{ + switch (type) { + case LBT_ONE_TIME_UPGRADE: + return "One time upgrade"; + case LBT_METERED_UPGRADE: + return "Metered upgrade"; + default: + return "Unknown license blob type"; + } +} + +static char *content_type(uint32_t type) +{ + switch (type) { + case CONTENT_TYPE_LK_ENC: + return "Licencse key encoding"; + case CONTENT_TYPE_LK_BLOB_ENC: + return "License key + Blob encoding"; + default: + return "Unknown content type"; + } +} + +static void get_feature(uint32_t encoding, char *feature) +{ + char *name = (char *)&encoding; + + feature[3] = name[0]; + feature[2] = name[1]; + feature[1] = name[2]; + feature[0] = name[3]; +} + +static int sdsi_meter_cert_show(struct sdsi_dev *s) { - uint64_t state_certificate[512] = {0}; - bool first_instance; - uint64_t previous; + char buf[METER_CERT_MAX_SIZE] = {0}; + struct bundle_encoding_counter *bec; + struct meter_certificate *mc; + uint32_t count = 0; FILE *cert_ptr; - int i, ret, size; + int ret, size; ret = sdsi_update_registers(s); if (ret) return ret; if (!s->regs.en_features.sdsi) { - fprintf(stderr, "SDSi feature is present but not enabled."); + fprintf(stderr, "SDSi feature is present but not enabled.\n"); + fprintf(stderr, " Unable to read meter certificate\n"); + return -1; + } + + if (!s->regs.en_features.metering) { + fprintf(stderr, "Metering not supporting on this socket.\n"); + return -1; + } + + ret = chdir(s->dev_path); + if (ret == -1) { + perror("chdir"); + return ret; + } + + cert_ptr = fopen("meter_certificate", "r"); + if (!cert_ptr) { + perror("Could not open 'meter_certificate' file"); + return -1; + } + + size = fread(buf, 1, sizeof(buf), cert_ptr); + if (!size) { + fprintf(stderr, "Could not read 'meter_certificate' file\n"); + fclose(cert_ptr); + return -1; + } + fclose(cert_ptr); + + mc = (struct meter_certificate *)buf; + + printf("\n"); + printf("Meter certificate for device %s\n", s->dev_name); + printf("\n"); + printf("Block Signature: 0x%x\n", mc->block_signature); + printf("Count Unit: %dms\n", mc->counter_unit); + printf("PPIN: 0x%lx\n", mc->ppin); + printf("Feature Bundle Length: %d\n", mc->bundle_length); + printf("MMRC encoding: %d\n", mc->mmrc_encoding); + printf("MMRC counter: %d\n", mc->mmrc_counter); + if (mc->bundle_length % 8) { + fprintf(stderr, "Invalid bundle length\n"); + return -1; + } + + if (mc->bundle_length > METER_MAX_NUM_BUNDLES * 8) { + fprintf(stderr, "More than %d bundles: %d\n", + METER_MAX_NUM_BUNDLES, mc->bundle_length / 8); + return -1; + } + + bec = (void *)(mc) + sizeof(mc); + + printf("Number of Feature Counters: %d\n", mc->bundle_length / 8); + while (count++ < mc->bundle_length / 8) { + char feature[5]; + + feature[4] = '\0'; + get_feature(bec[count].encoding, feature); + printf(" %s: %d\n", feature, bec[count].counter); + } + + return 0; +} + +static int sdsi_state_cert_show(struct sdsi_dev *s) +{ + char buf[STATE_CERT_MAX_SIZE] = {0}; + struct state_certificate *sc; + struct license_key_info *lki; + uint32_t offset = 0; + uint32_t count = 0; + FILE *cert_ptr; + int ret, size; + + ret = sdsi_update_registers(s); + if (ret) + return ret; + + if (!s->regs.en_features.sdsi) { + fprintf(stderr, "On Demand feature is present but not enabled."); fprintf(stderr, " Unable to read state certificate"); return -1; } @@ -198,32 +438,74 @@ static int sdsi_certificate_dump(struct sdsi_dev *s) return -1; } - size = fread(state_certificate, 1, sizeof(state_certificate), cert_ptr); + size = fread(buf, 1, sizeof(buf), cert_ptr); if (!size) { fprintf(stderr, "Could not read 'state_certificate' file\n"); fclose(cert_ptr); return -1; } + fclose(cert_ptr); - printf("%3d: 0x%lx\n", 0, state_certificate[0]); - previous = state_certificate[0]; - first_instance = true; + sc = (struct state_certificate *)buf; - for (i = 1; i < (int)(round_up(size, sizeof(uint64_t))/sizeof(uint64_t)); i++) { - if (state_certificate[i] == previous) { - if (first_instance) { - puts("*"); - first_instance = false; - } - continue; + /* Print register info for this guid */ + printf("\n"); + printf("State certificate for device %s\n", s->dev_name); + printf("\n"); + printf("Content Type: %s\n", content_type(sc->content_type)); + printf("Region Revision ID: %d\n", sc->region_rev_id); + printf("Header Size: %d\n", sc->header_size * 4); + printf("Total Size: %d\n", sc->total_size); + printf("OEM Key Size: %d\n", sc->key_size * 4); + printf("Number of Licenses: %d\n", sc->num_licenses); + + /* Skip over the license sizes 4 bytes per license) to get the license key info */ + lki = (void *)sc + sizeof(*sc) + (4 * sc->num_licenses); + + printf("License blob Info:\n"); + printf(" License Key Revision ID: 0x%x\n", lki->key_rev_id); + printf(" License Key Image Content: 0x%lx%lx%lx%lx%lx%lx\n", + lki->key_image_content[5], lki->key_image_content[4], + lki->key_image_content[3], lki->key_image_content[2], + lki->key_image_content[1], lki->key_image_content[0]); + + while (count++ < sc->num_licenses) { + uint32_t blob_size_field = *(uint32_t *)(buf + 0x14 + count * 4); + uint32_t blob_size = LICENSE_BLOB_SIZE(blob_size_field); + bool license_valid = LICENSE_VALID(blob_size_field); + struct license_blob_content *lbc = + (void *)(sc) + // start of the state certificate + sizeof(*sc) + // size of the state certificate + (4 * sc->num_licenses) + // total size of the blob size blocks + sizeof(*lki) + // size of the license key info + offset; // offset to this blob content + struct bundle_encoding *bundle = (void *)(lbc) + sizeof(*lbc); + char feature[5]; + uint32_t i; + + printf(" Blob %d:\n", count - 1); + printf(" License blob size: %u\n", blob_size); + printf(" License is valid: %s\n", license_valid ? "Yes" : "No"); + printf(" License blob type: %s\n", license_blob_type(lbc->type)); + printf(" License blob ID: 0x%lx\n", lbc->id); + printf(" PPIN: 0x%lx\n", lbc->ppin); + printf(" Previous PPIN: 0x%lx\n", lbc->previous_ppin); + printf(" Blob revision ID: %u\n", lbc->rev_id); + printf(" Number of Features: %u\n", lbc->num_bundles); + + feature[4] = '\0'; + + for (i = 0; i < min(lbc->num_bundles, STATE_MAX_NUM_IN_BUNDLE); i++) { + get_feature(bundle[i].encoding, feature); + printf(" Feature %d: %s\n", i, feature); } - printf("%3d: 0x%lx\n", i, state_certificate[i]); - previous = state_certificate[i]; - first_instance = true; - } - printf("%3d\n", i); - fclose(cert_ptr); + if (lbc->num_bundles > STATE_MAX_NUM_IN_BUNDLE) + fprintf(stderr, " Warning: %d > %d licenses in bundle reported.\n", + lbc->num_bundles, STATE_MAX_NUM_IN_BUNDLE); + + offset += blob_size; + }; return 0; } @@ -231,7 +513,7 @@ static int sdsi_certificate_dump(struct sdsi_dev *s) static int sdsi_provision(struct sdsi_dev *s, char *bin_file, enum command command) { int bin_fd, prov_fd, size, ret; - char buf[4096] = { 0 }; + char buf[STATE_CERT_MAX_SIZE] = { 0 }; char cap[] = "provision_cap"; char akc[] = "provision_akc"; char *prov_file; @@ -266,7 +548,7 @@ static int sdsi_provision(struct sdsi_dev *s, char *bin_file, enum command comma } /* Read the binary file into the buffer */ - size = read(bin_fd, buf, 4096); + size = read(bin_fd, buf, STATE_CERT_MAX_SIZE); if (size == -1) { close(bin_fd); close(prov_fd); @@ -298,7 +580,7 @@ static int sdsi_provision_akc(struct sdsi_dev *s, char *bin_file) return ret; if (!s->regs.en_features.sdsi) { - fprintf(stderr, "SDSi feature is present but not enabled. Unable to provision"); + fprintf(stderr, "On Demand feature is present but not enabled. Unable to provision"); return -1; } @@ -328,7 +610,7 @@ static int sdsi_provision_cap(struct sdsi_dev *s, char *bin_file) return ret; if (!s->regs.en_features.sdsi) { - fprintf(stderr, "SDSi feature is present but not enabled. Unable to provision"); + fprintf(stderr, "On Demand feature is present but not enabled. Unable to provision"); return -1; } @@ -443,25 +725,27 @@ static void sdsi_free_dev(struct sdsi_dev *s) static void usage(char *prog) { - printf("Usage: %s [-l] [-d DEVNO [-iD] [-a FILE] [-c FILE]]\n", prog); + printf("Usage: %s [-l] [-d DEVNO [-i] [-s] [-m] [-a FILE] [-c FILE]]\n", prog); } static void show_help(void) { printf("Commands:\n"); - printf(" %-18s\t%s\n", "-l, --list", "list available sdsi devices"); - printf(" %-18s\t%s\n", "-d, --devno DEVNO", "sdsi device number"); - printf(" %-18s\t%s\n", "-i --info", "show socket information"); - printf(" %-18s\t%s\n", "-D --dump", "dump state certificate data"); - printf(" %-18s\t%s\n", "-a --akc FILE", "provision socket with AKC FILE"); - printf(" %-18s\t%s\n", "-c --cap FILE>", "provision socket with CAP FILE"); + printf(" %-18s\t%s\n", "-l, --list", "list available On Demand devices"); + printf(" %-18s\t%s\n", "-d, --devno DEVNO", "On Demand device number"); + printf(" %-18s\t%s\n", "-i, --info", "show socket information"); + printf(" %-18s\t%s\n", "-s, --state", "show state certificate"); + printf(" %-18s\t%s\n", "-m, --meter", "show meter certificate"); + printf(" %-18s\t%s\n", "-a, --akc FILE", "provision socket with AKC FILE"); + printf(" %-18s\t%s\n", "-c, --cap FILE>", "provision socket with CAP FILE"); } int main(int argc, char *argv[]) { char bin_file[PATH_MAX], *dev_no = NULL; + bool device_selected = false; char *progname; - enum command command = CMD_NONE; + enum command command = -1; struct sdsi_dev *s; int ret = 0, opt; int option_index = 0; @@ -470,21 +754,23 @@ int main(int argc, char *argv[]) {"akc", required_argument, 0, 'a'}, {"cap", required_argument, 0, 'c'}, {"devno", required_argument, 0, 'd'}, - {"dump", no_argument, 0, 'D'}, {"help", no_argument, 0, 'h'}, {"info", no_argument, 0, 'i'}, {"list", no_argument, 0, 'l'}, + {"meter", no_argument, 0, 'm'}, + {"state", no_argument, 0, 's'}, {0, 0, 0, 0 } }; progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+a:c:d:Da:c:h", long_options, + while ((opt = getopt_long_only(argc, argv, "+a:c:d:hilms", long_options, &option_index)) != -1) { switch (opt) { case 'd': dev_no = optarg; + device_selected = true; break; case 'l': sdsi_list_devices(); @@ -492,8 +778,11 @@ int main(int argc, char *argv[]) case 'i': command = CMD_SOCKET_INFO; break; - case 'D': - command = CMD_DUMP_CERT; + case 'm': + command = CMD_METER_CERT; + break; + case 's': + command = CMD_STATE_CERT; break; case 'a': case 'c': @@ -520,39 +809,38 @@ int main(int argc, char *argv[]) } } - if (!dev_no) { - if (command != CMD_NONE) - fprintf(stderr, "Missing device number, DEVNO, for this command\n"); - usage(progname); - return -1; - } + if (device_selected) { + s = sdsi_create_dev(dev_no); + if (!s) + return -1; - s = sdsi_create_dev(dev_no); - if (!s) - return -1; + switch (command) { + case CMD_SOCKET_INFO: + ret = sdsi_read_reg(s); + break; + case CMD_METER_CERT: + ret = sdsi_meter_cert_show(s); + break; + case CMD_STATE_CERT: + ret = sdsi_state_cert_show(s); + break; + case CMD_PROV_AKC: + ret = sdsi_provision_akc(s, bin_file); + break; + case CMD_PROV_CAP: + ret = sdsi_provision_cap(s, bin_file); + break; + default: + fprintf(stderr, "No command specified\n"); + return -1; + } - /* Run the command */ - switch (command) { - case CMD_NONE: - fprintf(stderr, "Missing command for device %s\n", dev_no); - usage(progname); - break; - case CMD_SOCKET_INFO: - ret = sdsi_read_reg(s); - break; - case CMD_DUMP_CERT: - ret = sdsi_certificate_dump(s); - break; - case CMD_PROV_AKC: - ret = sdsi_provision_akc(s, bin_file); - break; - case CMD_PROV_CAP: - ret = sdsi_provision_cap(s, bin_file); - break; - } - - - sdsi_free_dev(s); + sdsi_free_dev(s); + + } else { + fprintf(stderr, "No device specified\n"); + return -1; + } return ret; } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 7f3b67a8b48f..11250c4734fe 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -55,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** | **user_ringbuf** } +| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index eb1b2a254eb1..14de72544995 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -31,7 +31,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -131,7 +131,7 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs @@ -150,6 +150,17 @@ DESCRIPTION Optional **pinmaps** argument can be provided to pin all maps under *MAP_DIR* directory. + If **autoattach** is specified program will be attached + before pin. In that case, only the link (representing the + program attached to its hook) is pinned, not the program as + such, so the path won't show in **bpftool prog show -f**, + only show in **bpftool link show -f**. Also, this only works + when bpftool (libbpf) is able to infer all necessary + information from the object file, in particular, it's not + supported for all program types. If a program does not + support autoattach, bpftool falls back to regular pinning + for that program instead. + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 4107a586b68b..30df7a707f02 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -7,10 +7,10 @@ Print bpftool's version number (similar to **bpftool version**), the number of the libbpf version in use, and optional features that were included when bpftool was compiled. Optional features include linking - against libbfd to provide the disassembler for JIT-ted programs - (**bpftool prog dump jited**) and usage of BPF skeletons (some - features like **bpftool prog profile** or showing pids associated to - BPF objects may rely on it). + against LLVM or libbfd to provide the disassembler for JIT-ted + programs (**bpftool prog dump jited**) and usage of BPF skeletons + (some features like **bpftool prog profile** or showing pids + associated to BPF objects may rely on it). -j, --json Generate JSON output. For commands that cannot produce JSON, this @@ -23,12 +23,3 @@ Print all logs available, even debug-level information. This includes logs from libbpf as well as from the verifier, when attempting to load programs. - --l, --legacy - Use legacy libbpf mode which has more relaxed BPF program - requirements. By default, bpftool has more strict requirements - about section names, changes pinning logic and doesn't support - some of the older non-BTF map declarations. - - See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 - for details. diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst index ccf1ffa0686c..827e3ffb1766 100644 --- a/tools/bpf/bpftool/Documentation/substitutions.rst +++ b/tools/bpf/bpftool/Documentation/substitutions.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** } +.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4a95c017ad4c..787b857d3fb5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -93,11 +93,22 @@ INSTALL ?= install RM ?= rm -f FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \ - disassembler-four-args disassembler-init-styled libcap \ - clang-bpf-co-re -FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \ - libcap clang-bpf-co-re + +FEATURE_TESTS := clang-bpf-co-re +FEATURE_TESTS += llvm +FEATURE_TESTS += libcap +FEATURE_TESTS += libbfd +FEATURE_TESTS += libbfd-liberty +FEATURE_TESTS += libbfd-liberty-z +FEATURE_TESTS += disassembler-four-args +FEATURE_TESTS += disassembler-init-styled + +FEATURE_DISPLAY := clang-bpf-co-re +FEATURE_DISPLAY += llvm +FEATURE_DISPLAY += libcap +FEATURE_DISPLAY += libbfd +FEATURE_DISPLAY += libbfd-liberty +FEATURE_DISPLAY += libbfd-liberty-z check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -115,13 +126,6 @@ include $(FEATURES_DUMP) endif endif -ifeq ($(feature-disassembler-four-args), 1) -CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE -endif -ifeq ($(feature-disassembler-init-styled), 1) - CFLAGS += -DDISASM_INIT_STYLED -endif - LIBS = $(LIBBPF) -lelf -lz LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz ifeq ($(feature-libcap), 1) @@ -133,21 +137,41 @@ include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool -BFD_SRCS = jit_disasm.c +SRCS := $(wildcard *.c) -SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) - -ifeq ($(feature-libbfd),1) - LIBS += -lbfd -ldl -lopcodes -else ifeq ($(feature-libbfd-liberty),1) - LIBS += -lbfd -ldl -lopcodes -liberty -else ifeq ($(feature-libbfd-liberty-z),1) - LIBS += -lbfd -ldl -lopcodes -liberty -lz +ifeq ($(feature-llvm),1) + # If LLVM is available, use it for JIT disassembly + CFLAGS += -DHAVE_LLVM_SUPPORT + LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets + CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) +else + # Fall back on libbfd + ifeq ($(feature-libbfd),1) + LIBS += -lbfd -ldl -lopcodes + else ifeq ($(feature-libbfd-liberty),1) + LIBS += -lbfd -ldl -lopcodes -liberty + else ifeq ($(feature-libbfd-liberty-z),1) + LIBS += -lbfd -ldl -lopcodes -liberty -lz + endif + + # If one of the above feature combinations is set, we support libbfd + ifneq ($(filter -lbfd,$(LIBS)),) + CFLAGS += -DHAVE_LIBBFD_SUPPORT + + # Libbfd interface changed over time, figure out what we need + ifeq ($(feature-disassembler-four-args), 1) + CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE + endif + ifeq ($(feature-disassembler-init-styled), 1) + CFLAGS += -DDISASM_INIT_STYLED + endif + endif endif - -ifneq ($(filter -lbfd,$(LIBS)),) -CFLAGS += -DHAVE_LIBBFD_SUPPORT -SRCS += $(BFD_SRCS) +ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),) + # No support for JIT disassembly + SRCS := $(filter-out jit_disasm.c,$(SRCS)) endif HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index dc1641e3670e..35f26f7c1124 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -261,7 +261,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf --legacy' + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -505,6 +505,7 @@ _bpftool() _bpftool_once_attr 'type' _bpftool_once_attr 'dev' _bpftool_once_attr 'pinmaps' + _bpftool_once_attr 'autoattach' return 0 ;; esac diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 68a70ac03c80..352290ba7b29 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -467,9 +467,8 @@ static int dump_btf_c(const struct btf *btf, int err = 0, i; d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + if (!d) + return -errno; printf("#ifndef __VMLINUX_H__\n"); printf("#define __VMLINUX_H__\n"); @@ -512,11 +511,9 @@ static struct btf *get_vmlinux_btf_from_sysfs(void) struct btf *base; base = btf__parse(sysfs_vmlinux, NULL); - if (libbpf_get_error(base)) { - p_err("failed to parse vmlinux BTF at '%s': %ld\n", - sysfs_vmlinux, libbpf_get_error(base)); - base = NULL; - } + if (!base) + p_err("failed to parse vmlinux BTF at '%s': %d\n", + sysfs_vmlinux, -errno); return base; } @@ -559,7 +556,7 @@ static int do_dump(int argc, char **argv) __u32 btf_id = -1; const char *src; int fd = -1; - int err; + int err = 0; if (!REQ_ARGS(2)) { usage(); @@ -634,8 +631,8 @@ static int do_dump(int argc, char **argv) base = get_vmlinux_btf_from_sysfs(); btf = btf__parse_split(*argv, base ?: base_btf); - err = libbpf_get_error(btf); if (!btf) { + err = -errno; p_err("failed to load BTF from %s: %s", *argv, strerror(errno)); goto done; @@ -681,8 +678,8 @@ static int do_dump(int argc, char **argv) } btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); - err = libbpf_get_error(btf); if (!btf) { + err = -errno; p_err("get btf by id (%u): %s", btf_id, strerror(errno)); goto done; } @@ -815,8 +812,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, if (!btf_id) continue; - err = hashmap__append(tab, u32_as_hash_field(btf_id), - u32_as_hash_field(id)); + err = hashmap__append(tab, btf_id, id); if (err) { p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", btf_id, id, strerror(-err)); @@ -875,17 +871,13 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("size %uB", info->btf_size); n = 0; - hashmap__for_each_key_entry(btf_prog_table, entry, - u32_as_hash_field(info->id)) { - printf("%s%u", n++ == 0 ? " prog_ids " : ",", - hash_field_as_u32(entry->value)); + hashmap__for_each_key_entry(btf_prog_table, entry, info->id) { + printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value); } n = 0; - hashmap__for_each_key_entry(btf_map_table, entry, - u32_as_hash_field(info->id)) { - printf("%s%u", n++ == 0 ? " map_ids " : ",", - hash_field_as_u32(entry->value)); + hashmap__for_each_key_entry(btf_map_table, entry, info->id) { + printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value); } emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); @@ -907,17 +899,15 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_name(json_wtr, "prog_ids"); jsonw_start_array(json_wtr); /* prog_ids */ - hashmap__for_each_key_entry(btf_prog_table, entry, - u32_as_hash_field(info->id)) { - jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); + hashmap__for_each_key_entry(btf_prog_table, entry, info->id) { + jsonw_uint(json_wtr, entry->value); } jsonw_end_array(json_wtr); /* prog_ids */ jsonw_name(json_wtr, "map_ids"); jsonw_start_array(json_wtr); /* map_ids */ - hashmap__for_each_key_entry(btf_map_table, entry, - u32_as_hash_field(info->id)) { - jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); + hashmap__for_each_key_entry(btf_map_table, entry, info->id) { + jsonw_uint(json_wtr, entry->value); } jsonw_end_array(json_wtr); /* map_ids */ diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 19924b6ce796..eda71fdfe95a 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -75,7 +75,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, goto print; prog_btf = btf__load_from_kernel_by_id(info.btf_id); - if (libbpf_get_error(prog_btf)) + if (!prog_btf) goto print; func_type = btf__type_by_id(prog_btf, finfo.type_id); if (!func_type || !btf_is_func(func_type)) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 0cdb4f711510..620032042576 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -495,10 +497,11 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, goto out_close; } - err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path); + err = hashmap__append(build_fn_table, pinned_info.id, path); if (err) { p_err("failed to append entry to hashmap for ID %u, path '%s': %s", pinned_info.id, path, strerror(errno)); + free(path); goto out_close; } @@ -546,7 +549,7 @@ void delete_pinned_obj_table(struct hashmap *map) return; hashmap__for_each_entry(map, entry, bkt) - free(entry->value); + free(entry->pvalue); hashmap__free(map); } @@ -628,12 +631,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) } const char * -ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, - const char **opt) +ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt) { + __maybe_unused int device_id; char devname[IF_NAMESIZE]; int vendor_id; - int device_id; if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { p_err("Can't get net device name for ifindex %d: %s", ifindex, @@ -648,6 +650,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, } switch (vendor_id) { +#ifdef HAVE_LIBBFD_SUPPORT case 0x19ee: device_id = read_sysfs_netdev_hex_int(devname, "device"); if (device_id != 0x4000 && @@ -656,8 +659,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); *opt = "ctx4"; return "NFP-6xxx"; +#endif /* HAVE_LIBBFD_SUPPORT */ + /* No NFP support in LLVM, we have no valid triple to return. */ default: - p_err("Can't get bfd arch name for device vendor id 0x%04x", + p_err("Can't get arch name for device vendor id 0x%04x", vendor_id); return NULL; } @@ -1040,12 +1045,12 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } -size_t hash_fn_for_key_as_id(const void *key, void *ctx) +size_t hash_fn_for_key_as_id(long key, void *ctx) { - return (size_t)key; + return key; } -bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) +bool equal_fn_for_key_as_id(long k1, long k2, void *ctx) { return k1 == k2; } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index cf8b4e525c88..2883660d6b67 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -252,9 +252,8 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) int err = 0; d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + if (!d) + return -errno; bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ @@ -976,13 +975,12 @@ static int do_skeleton(int argc, char **argv) /* log_level1 + log_level2 + stats, but not stable UAPI */ opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_mem(obj_data, file_sz, &opts); - err = libbpf_get_error(obj); - if (err) { + if (!obj) { char err_buf[256]; + err = -errno; libbpf_strerror(err, err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); - obj = NULL; goto out; } @@ -1660,21 +1658,16 @@ struct btfgen_info { struct btf *marked_btf; /* btf structure used to mark used types */ }; -static size_t btfgen_hash_fn(const void *key, void *ctx) +static size_t btfgen_hash_fn(long key, void *ctx) { - return (size_t)key; + return key; } -static bool btfgen_equal_fn(const void *k1, const void *k2, void *ctx) +static bool btfgen_equal_fn(long k1, long k2, void *ctx) { return k1 == k2; } -static void *u32_as_hash_key(__u32 x) -{ - return (void *)(uintptr_t)x; -} - static void btfgen_free_info(struct btfgen_info *info) { if (!info) @@ -2086,18 +2079,18 @@ static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path) struct bpf_core_spec specs_scratch[3] = {}; struct bpf_core_relo_res targ_res = {}; struct bpf_core_cand_list *cands = NULL; - const void *type_key = u32_as_hash_key(relo->type_id); const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off); if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && - !hashmap__find(cand_cache, type_key, (void **)&cands)) { + !hashmap__find(cand_cache, relo->type_id, &cands)) { cands = btfgen_find_cands(btf, info->src_btf, relo->type_id); if (!cands) { err = -errno; goto out; } - err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); + err = hashmap__set(cand_cache, relo->type_id, cands, + NULL, NULL); if (err) goto out; } @@ -2120,7 +2113,7 @@ out: if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { - bpf_core_free_cands(entry->value); + bpf_core_free_cands(entry->pvalue); } hashmap__free(cand_cache); } diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index f88fdc820d23..9a1d2365a297 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -1,7 +1,10 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2020 Facebook +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif +#include <errno.h> #include <unistd.h> #include <linux/err.h> #include <bpf/libbpf.h> @@ -46,8 +49,8 @@ static int do_pin(int argc, char **argv) } obj = bpf_object__open(objfile); - err = libbpf_get_error(obj); - if (err) { + if (!obj) { + err = -errno; p_err("can't open objfile %s", objfile); goto close_map_fd; } @@ -60,13 +63,14 @@ static int do_pin(int argc, char **argv) prog = bpf_object__next_program(obj, NULL); if (!prog) { + err = -errno; p_err("can't find bpf program in objfile %s", objfile); goto close_obj; } link = bpf_program__attach_iter(prog, &iter_opts); - err = libbpf_get_error(link); - if (err) { + if (!link) { + err = -errno; p_err("attach_iter failed for program %s", bpf_program__name(prog)); goto close_obj; diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index aaf99a0168c9..7b8d9ec89ebd 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -11,35 +11,151 @@ * Licensed under the GNU General Public License, version 2.0 (GPLv2) */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <stdarg.h> #include <stdint.h> #include <stdlib.h> -#include <assert.h> #include <unistd.h> #include <string.h> -#include <bfd.h> -#include <dis-asm.h> #include <sys/stat.h> #include <limits.h> #include <bpf/libbpf.h> + +#ifdef HAVE_LLVM_SUPPORT +#include <llvm-c/Core.h> +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#include <llvm-c/TargetMachine.h> +#endif + +#ifdef HAVE_LIBBFD_SUPPORT +#include <bfd.h> +#include <dis-asm.h> #include <tools/dis-asm-compat.h> +#endif #include "json_writer.h" #include "main.h" -static void get_exec_path(char *tpath, size_t size) +static int oper_count; + +#ifdef HAVE_LLVM_SUPPORT +#define DISASM_SPACER + +typedef LLVMDisasmContextRef disasm_ctx_t; + +static int printf_json(char *s) +{ + s = strtok(s, " \t"); + jsonw_string_field(json_wtr, "operation", s); + + jsonw_name(json_wtr, "operands"); + jsonw_start_array(json_wtr); + oper_count = 1; + + while ((s = strtok(NULL, " \t,()")) != 0) { + jsonw_string(json_wtr, s); + oper_count++; + } + return 0; +} + +/* This callback to set the ref_type is necessary to have the LLVM disassembler + * print PC-relative addresses instead of byte offsets for branch instruction + * targets. + */ +static const char * +symbol_lookup_callback(__maybe_unused void *disasm_info, + __maybe_unused uint64_t ref_value, + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, + __maybe_unused const char **ref_name) +{ + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} + +static int +init_context(disasm_ctx_t *ctx, const char *arch, + __maybe_unused const char *disassembler_options, + __maybe_unused unsigned char *image, __maybe_unused ssize_t len) +{ + char *triple; + + if (arch) + triple = LLVMNormalizeTargetTriple(arch); + else + triple = LLVMGetDefaultTargetTriple(); + if (!triple) { + p_err("Failed to retrieve triple"); + return -1; + } + *ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback); + LLVMDisposeMessage(triple); + + if (!*ctx) { + p_err("Failed to create disassembler"); + return -1; + } + + return 0; +} + +static void destroy_context(disasm_ctx_t *ctx) +{ + LLVMDisposeMessage(*ctx); +} + +static int +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +{ + char buf[256]; + int count; + + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + buf, sizeof(buf)); + if (json_output) + printf_json(buf); + else + printf("%s", buf); + + return count; +} + +int disasm_init(void) +{ + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + return 0; +} +#endif /* HAVE_LLVM_SUPPORT */ + +#ifdef HAVE_LIBBFD_SUPPORT +#define DISASM_SPACER "\t" + +typedef struct { + struct disassemble_info *info; + disassembler_ftype disassemble; + bfd *bfdf; +} disasm_ctx_t; + +static int get_exec_path(char *tpath, size_t size) { const char *path = "/proc/self/exe"; ssize_t len; len = readlink(path, tpath, size - 1); - assert(len > 0); + if (len <= 0) + return -1; + tpath[len] = 0; + + return 0; } -static int oper_count; static int printf_json(void *out, const char *fmt, va_list ap) { char *s; @@ -97,37 +213,44 @@ static int fprintf_json_styled(void *out, return r; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum) +static int init_context(disasm_ctx_t *ctx, const char *arch, + const char *disassembler_options, + unsigned char *image, ssize_t len) { - const struct bpf_line_info *linfo = NULL; - disassembler_ftype disassemble; - struct disassemble_info info; - unsigned int nr_skip = 0; - int count, i, pc = 0; + struct disassemble_info *info; char tpath[PATH_MAX]; bfd *bfdf; - if (!len) - return; - memset(tpath, 0, sizeof(tpath)); - get_exec_path(tpath, sizeof(tpath)); + if (get_exec_path(tpath, sizeof(tpath))) { + p_err("failed to create disassembler (get_exec_path)"); + return -1; + } - bfdf = bfd_openr(tpath, NULL); - assert(bfdf); - assert(bfd_check_format(bfdf, bfd_object)); + ctx->bfdf = bfd_openr(tpath, NULL); + if (!ctx->bfdf) { + p_err("failed to create disassembler (bfd_openr)"); + return -1; + } + if (!bfd_check_format(ctx->bfdf, bfd_object)) { + p_err("failed to create disassembler (bfd_check_format)"); + goto err_close; + } + bfdf = ctx->bfdf; + + ctx->info = malloc(sizeof(struct disassemble_info)); + if (!ctx->info) { + p_err("mem alloc failed"); + goto err_close; + } + info = ctx->info; if (json_output) - init_disassemble_info_compat(&info, stdout, + init_disassemble_info_compat(info, stdout, (fprintf_ftype) fprintf_json, fprintf_json_styled); else - init_disassemble_info_compat(&info, stdout, + init_disassemble_info_compat(info, stdout, (fprintf_ftype) fprintf, fprintf_styled); @@ -139,28 +262,77 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, bfdf->arch_info = inf; } else { p_err("No libbfd support for %s", arch); - return; + goto err_free; } } - info.arch = bfd_get_arch(bfdf); - info.mach = bfd_get_mach(bfdf); + info->arch = bfd_get_arch(bfdf); + info->mach = bfd_get_mach(bfdf); if (disassembler_options) - info.disassembler_options = disassembler_options; - info.buffer = image; - info.buffer_length = len; + info->disassembler_options = disassembler_options; + info->buffer = image; + info->buffer_length = len; - disassemble_init_for_target(&info); + disassemble_init_for_target(info); #ifdef DISASM_FOUR_ARGS_SIGNATURE - disassemble = disassembler(info.arch, - bfd_big_endian(bfdf), - info.mach, - bfdf); + ctx->disassemble = disassembler(info->arch, + bfd_big_endian(bfdf), + info->mach, + bfdf); #else - disassemble = disassembler(bfdf); + ctx->disassemble = disassembler(bfdf); #endif - assert(disassemble); + if (!ctx->disassemble) { + p_err("failed to create disassembler"); + goto err_free; + } + return 0; + +err_free: + free(info); +err_close: + bfd_close(ctx->bfdf); + return -1; +} + +static void destroy_context(disasm_ctx_t *ctx) +{ + free(ctx->info); + bfd_close(ctx->bfdf); +} + +static int +disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image, + __maybe_unused ssize_t len, int pc) +{ + return ctx->disassemble(pc, ctx->info); +} + +int disasm_init(void) +{ + bfd_init(); + return 0; +} +#endif /* HAVE_LIBBPFD_SUPPORT */ + +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) +{ + const struct bpf_line_info *linfo = NULL; + unsigned int nr_skip = 0; + int count, i, pc = 0; + disasm_ctx_t ctx; + + if (!len) + return -1; + + if (init_context(&ctx, arch, disassembler_options, image, len)) + return -1; if (json_output) jsonw_start_array(json_wtr); @@ -185,10 +357,11 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (linfo) btf_dump_linfo_plain(btf, linfo, "; ", linum); - printf("%4x:\t", pc); + printf("%4x:" DISASM_SPACER, pc); } - count = disassemble(pc, &info); + count = disassemble_insn(&ctx, image, len, pc); + if (json_output) { /* Operand array, was started in fprintf_json. Before * that, make sure we have a _null_ value if no operand @@ -224,11 +397,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (json_output) jsonw_end_array(json_wtr); - bfd_close(bfdf); -} + destroy_context(&ctx); -int disasm_init(void) -{ - bfd_init(); return 0; } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2863639706dd..6f4cfe01cad4 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -204,9 +204,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(link_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hashmap__for_each_key_entry(link_table, entry, info->id) + jsonw_string(json_wtr, entry->pvalue); jsonw_end_array(json_wtr); } @@ -309,9 +308,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (!hashmap__empty(link_table)) { struct hashmap_entry *entry; - hashmap__for_each_key_entry(link_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hashmap__for_each_key_entry(link_table, entry, info->id) + printf("\n\tpinned %s", (char *)entry->pvalue); } emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index ccd7457f92bf..08d0ac543c67 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,7 +31,6 @@ bool block_mount; bool verifier_logs; bool relaxed_maps; bool use_loader; -bool legacy_libbpf; struct btf *base_btf; struct hashmap *refs_table; @@ -71,6 +70,27 @@ static int do_help(int argc, char **argv) return 0; } +static int do_batch(int argc, char **argv); +static int do_version(int argc, char **argv); + +static const struct cmd commands[] = { + { "help", do_help }, + { "batch", do_batch }, + { "prog", do_prog }, + { "map", do_map }, + { "link", do_link }, + { "cgroup", do_cgroup }, + { "perf", do_perf }, + { "net", do_net }, + { "feature", do_feature }, + { "btf", do_btf }, + { "gen", do_gen }, + { "struct_ops", do_struct_ops }, + { "iter", do_iter }, + { "version", do_version }, + { 0 } +}; + #ifndef BPFTOOL_VERSION /* bpftool's major and minor version numbers are aligned on libbpf's. There is * an offset of 6 for the version number, because bpftool's version was higher @@ -82,6 +102,15 @@ static int do_help(int argc, char **argv) #define BPFTOOL_PATCH_VERSION 0 #endif +static void +print_feature(const char *feature, bool state, unsigned int *nb_features) +{ + if (state) { + printf("%s %s", *nb_features ? "," : "", feature); + *nb_features = *nb_features + 1; + } +} + static int do_version(int argc, char **argv) { #ifdef HAVE_LIBBFD_SUPPORT @@ -89,11 +118,28 @@ static int do_version(int argc, char **argv) #else const bool has_libbfd = false; #endif +#ifdef HAVE_LLVM_SUPPORT + const bool has_llvm = true; +#else + const bool has_llvm = false; +#endif #ifdef BPFTOOL_WITHOUT_SKELETONS const bool has_skeletons = false; #else const bool has_skeletons = true; #endif + bool bootstrap = false; + int i; + + for (i = 0; commands[i].cmd; i++) { + if (!strcmp(commands[i].cmd, "prog")) { + /* Assume we run a bootstrap version if "bpftool prog" + * is not available. + */ + bootstrap = !commands[i].func; + break; + } + } if (json_output) { jsonw_start_object(json_wtr); /* root object */ @@ -112,8 +158,9 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); - jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); + jsonw_bool_field(json_wtr, "llvm", has_llvm); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); + jsonw_bool_field(json_wtr, "bootstrap", bootstrap); jsonw_end_object(json_wtr); /* features */ jsonw_end_object(json_wtr); /* root object */ @@ -128,16 +175,10 @@ static int do_version(int argc, char **argv) #endif printf("using libbpf %s\n", libbpf_version_string()); printf("features:"); - if (has_libbfd) { - printf(" libbfd"); - nb_features++; - } - if (!legacy_libbpf) { - printf("%s libbpf_strict", nb_features++ ? "," : ""); - nb_features++; - } - if (has_skeletons) - printf("%s skeletons", nb_features++ ? "," : ""); + print_feature("libbfd", has_libbfd, &nb_features); + print_feature("llvm", has_llvm, &nb_features); + print_feature("skeletons", has_skeletons, &nb_features); + print_feature("bootstrap", bootstrap, &nb_features); printf("\n"); } return 0; @@ -279,26 +320,6 @@ static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb) return n_argc; } -static int do_batch(int argc, char **argv); - -static const struct cmd cmds[] = { - { "help", do_help }, - { "batch", do_batch }, - { "prog", do_prog }, - { "map", do_map }, - { "link", do_link }, - { "cgroup", do_cgroup }, - { "perf", do_perf }, - { "net", do_net }, - { "feature", do_feature }, - { "btf", do_btf }, - { "gen", do_gen }, - { "struct_ops", do_struct_ops }, - { "iter", do_iter }, - { "version", do_version }, - { 0 } -}; - static int do_batch(int argc, char **argv) { char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX]; @@ -313,12 +334,12 @@ static int do_batch(int argc, char **argv) if (argc < 2) { p_err("too few parameters for batch"); return -1; - } else if (!is_prefix(*argv, "file")) { - p_err("expected 'file', got: %s", *argv); - return -1; } else if (argc > 2) { p_err("too many parameters for batch"); return -1; + } else if (!is_prefix(*argv, "file")) { + p_err("expected 'file', got: %s", *argv); + return -1; } NEXT_ARG(); @@ -386,7 +407,7 @@ static int do_batch(int argc, char **argv) jsonw_name(json_wtr, "output"); } - err = cmd_select(cmds, n_argc, n_argv, do_help); + err = cmd_select(commands, n_argc, n_argv, do_help); if (json_output) jsonw_end_object(json_wtr); @@ -427,7 +448,6 @@ int main(int argc, char **argv) { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, - { "legacy", no_argument, NULL, 'l' }, { 0 } }; bool version_requested = false; @@ -450,7 +470,7 @@ int main(int argc, char **argv) json_output = false; show_pinned = false; block_mount = false; - bin_name = argv[0]; + bin_name = "bpftool"; opterr = 0; while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", @@ -490,19 +510,15 @@ int main(int argc, char **argv) break; case 'B': base_btf = btf__parse(optarg, NULL); - if (libbpf_get_error(base_btf)) { - p_err("failed to parse base BTF at '%s': %ld\n", - optarg, libbpf_get_error(base_btf)); - base_btf = NULL; + if (!base_btf) { + p_err("failed to parse base BTF at '%s': %d\n", + optarg, -errno); return -1; } break; case 'L': use_loader = true; break; - case 'l': - legacy_libbpf = true; - break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -512,14 +528,6 @@ int main(int argc, char **argv) } } - if (!legacy_libbpf) { - /* Allow legacy map definitions for skeleton generation. - * It will still be rejected if users use LIBBPF_STRICT_ALL - * mode for loading generated skeleton. - */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - } - argc -= optind; argv += optind; if (argc < 0) @@ -528,7 +536,7 @@ int main(int argc, char **argv) if (version_requested) return do_version(argc, argv); - ret = cmd_select(cmds, argc, argv, do_help); + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5e5060c2ac04..a84224b6a604 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ @@ -82,7 +82,6 @@ extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; -extern bool legacy_libbpf; extern struct btf *base_btf; extern struct hashmap *refs_table; @@ -172,27 +171,28 @@ int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; -#ifdef HAVE_LIBBFD_SUPPORT -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum); +#if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum); int disasm_init(void); #else static inline -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum) +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) { + return 0; } static inline int disasm_init(void) { - p_err("No libbfd support"); + p_err("No JIT disassembly support"); return -1; } #endif @@ -202,8 +202,7 @@ void print_hex_data_json(uint8_t *data, size_t len); unsigned int get_page_size(void); unsigned int get_possible_cpus(void); const char * -ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, - const char **opt); +ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt); struct btf_dumper { const struct btf *btf; @@ -240,8 +239,8 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, int print_all_levels(__maybe_unused enum libbpf_print_level level, const char *format, va_list args); -size_t hash_fn_for_key_as_id(const void *key, void *ctx); -bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); +size_t hash_fn_for_key_as_id(long key, void *ctx); +bool equal_fn_for_key_as_id(long k1, long k2, void *ctx); /* bpf_attach_type_input_str - convert the provided attach type value into a * textual representation that we accept for input purposes. @@ -257,16 +256,6 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); */ const char *bpf_attach_type_input_str(enum bpf_attach_type t); -static inline void *u32_as_hash_field(__u32 x) -{ - return (void *)(uintptr_t)x; -} - -static inline __u32 hash_field_as_u32(const void *x) -{ - return (__u32)(uintptr_t)x; -} - static inline bool hashmap__empty(struct hashmap *map) { return map ? hashmap__size(map) == 0 : true; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 9a6ca9f31133..88911d3aa2d9 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ -#include <assert.h> #include <errno.h> #include <fcntl.h> #include <linux/err.h> @@ -519,9 +518,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(map_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hashmap__for_each_key_entry(map_table, entry, info->id) + jsonw_string(json_wtr, entry->pvalue); jsonw_end_array(json_wtr); } @@ -596,9 +594,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (!hashmap__empty(map_table)) { struct hashmap_entry *entry; - hashmap__for_each_key_entry(map_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hashmap__for_each_key_entry(map_table, entry, info->id) + printf("\n\tpinned %s", (char *)entry->pvalue); } if (frozen_str) { @@ -789,18 +786,18 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - err = libbpf_get_error(btf_vmlinux); - if (err) { + if (!btf_vmlinux) { p_err("failed to get kernel btf"); - return err; + return -errno; } } *btf = btf_vmlinux; } else if (info->btf_value_type_id) { *btf = btf__load_from_kernel_by_id(info->btf_id); - err = libbpf_get_error(*btf); - if (err) + if (!*btf) { + err = -errno; p_err("failed to get btf"); + } } else { *btf = NULL; } @@ -810,16 +807,10 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) static void free_map_kv_btf(struct btf *btf) { - if (!libbpf_get_error(btf) && btf != btf_vmlinux) + if (btf != btf_vmlinux) btf__free(btf); } -static void free_btf_vmlinux(void) -{ - if (!libbpf_get_error(btf_vmlinux)) - btf__free(btf_vmlinux); -} - static int map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, bool show_header) @@ -956,7 +947,7 @@ exit_close: close(fds[i]); exit_free: free(fds); - free_btf_vmlinux(); + btf__free(btf_vmlinux); return err; } @@ -1459,7 +1450,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter | user_ringbuf }\n" + " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 526a332c48e6..c40e44c938ae 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <stdlib.h> diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 226ec2c39052..91743445e4c7 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -2,7 +2,9 @@ // Copyright (C) 2018 Facebook // Author: Yonghong Song <yhs@fb.com> +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <ctype.h> #include <errno.h> #include <fcntl.h> diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index bb6c969a114a..00c77edb6331 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -36,8 +36,8 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) int err, i; void *tmp; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) { - refs = entry->value; + hashmap__for_each_key_entry(map, entry, e->id) { + refs = entry->pvalue; for (i = 0; i < refs->ref_cnt; i++) { if (refs->refs[i].pid == e->pid) @@ -81,7 +81,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) refs->has_bpf_cookie = e->has_bpf_cookie; refs->bpf_cookie = e->bpf_cookie; - err = hashmap__append(map, u32_as_hash_field(e->id), refs); + err = hashmap__append(map, e->id, refs); if (err) p_err("failed to append entry to hashmap for ID %u: %s", e->id, strerror(errno)); @@ -183,7 +183,7 @@ void delete_obj_refs_table(struct hashmap *map) return; hashmap__for_each_entry(map, entry, bkt) { - struct obj_refs *refs = entry->value; + struct obj_refs *refs = entry->pvalue; free(refs->refs); free(refs); @@ -200,8 +200,8 @@ void emit_obj_refs_json(struct hashmap *map, __u32 id, if (hashmap__empty(map)) return; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { - struct obj_refs *refs = entry->value; + hashmap__for_each_key_entry(map, entry, id) { + struct obj_refs *refs = entry->pvalue; int i; if (refs->ref_cnt == 0) @@ -232,8 +232,8 @@ void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) if (hashmap__empty(map)) return; - hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { - struct obj_refs *refs = entry->value; + hashmap__for_each_key_entry(map, entry, id) { + struct obj_refs *refs = entry->pvalue; int i; if (refs->ref_cnt == 0) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index c81362a001ba..cfc9fdc1e863 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -320,7 +322,7 @@ static void show_prog_metadata(int fd, __u32 num_maps) return; btf = btf__load_from_kernel_by_id(map_info.btf_id); - if (libbpf_get_error(btf)) + if (!btf) goto out_free; t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); @@ -484,9 +486,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hashmap__for_each_key_entry(prog_table, entry, - u32_as_hash_field(info->id)) - jsonw_string(json_wtr, entry->value); + hashmap__for_each_key_entry(prog_table, entry, info->id) + jsonw_string(json_wtr, entry->pvalue); jsonw_end_array(json_wtr); } @@ -559,9 +560,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (!hashmap__empty(prog_table)) { struct hashmap_entry *entry; - hashmap__for_each_key_entry(prog_table, entry, - u32_as_hash_field(info->id)) - printf("\n\tpinned %s", (char *)entry->value); + hashmap__for_each_key_entry(prog_table, entry, info->id) + printf("\n\tpinned %s", (char *)entry->pvalue); } if (info->btf_id) @@ -726,7 +726,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (info->btf_id) { btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { + if (!btf) { p_err("failed to get btf"); return -1; } @@ -762,10 +762,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, const char *name = NULL; if (info->ifindex) { - name = ifindex_to_bfd_params(info->ifindex, - info->netns_dev, - info->netns_ino, - &disasm_opt); + name = ifindex_to_arch(info->ifindex, info->netns_dev, + info->netns_ino, &disasm_opt); if (!name) goto exit_free; } @@ -820,10 +818,11 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum); + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; img += lens[i]; @@ -836,8 +835,10 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, member_len, opcodes, name, - disasm_opt, btf, NULL, 0, 0, false); + if (disasm_print_insn(buf, member_len, opcodes, name, + disasm_opt, btf, NULL, 0, 0, + false)) + goto exit_free; } } else if (visual) { if (json_output) @@ -1453,6 +1454,67 @@ get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return ret; } +static int +auto_attach_program(struct bpf_program *prog, const char *path) +{ + struct bpf_link *link; + int err; + + link = bpf_program__attach(prog); + if (!link) { + p_info("Program %s does not support autoattach, falling back to pinning", + bpf_program__name(prog)); + return bpf_obj_pin(bpf_program__fd(prog), path); + } + + err = bpf_link__pin(link, path); + bpf_link__destroy(link); + return err; +} + +static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); + if (len < 0) + return -EINVAL; + if ((size_t)len >= buf_sz) + return -ENAMETOOLONG; + + return 0; +} + +static int +auto_attach_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + char buf[PATH_MAX]; + int err; + + bpf_object__for_each_program(prog, obj) { + err = pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog)); + if (err) + goto err_unpin_programs; + + err = auto_attach_program(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_object__prev_program(obj, prog))) { + if (pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog))) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; @@ -1464,6 +1526,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; const char *pinmaps = NULL; + bool auto_attach = false; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -1583,6 +1646,9 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_free_reuse_maps; pinmaps = GET_ARG(); + } else if (is_prefix(*argv, "autoattach")) { + auto_attach = true; + NEXT_ARG(); } else { p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); @@ -1597,7 +1663,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) { + if (!obj) { p_err("failed to open object file"); goto err_free_reuse_maps; } @@ -1692,14 +1758,20 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = bpf_obj_pin(bpf_program__fd(prog), pinfile); + if (auto_attach) + err = auto_attach_program(prog, pinfile); + else + err = bpf_obj_pin(bpf_program__fd(prog), pinfile); if (err) { p_err("failed to pin program %s", bpf_program__section_name(prog)); goto err_close_obj; } } else { - err = bpf_object__pin_programs(obj, pinfile); + if (auto_attach) + err = auto_attach_programs(obj, pinfile); + else + err = bpf_object__pin_programs(obj, pinfile); if (err) { p_err("failed to pin all programs"); goto err_close_obj; @@ -1730,11 +1802,6 @@ err_unpin: else bpf_object__unpin_programs(obj, pinfile); err_close_obj: - if (!legacy_libbpf) { - p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" - "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); - } - bpf_object__close(obj); err_free_reuse_maps: for (i = 0; i < old_map_fds; i++) @@ -1815,7 +1882,7 @@ static int do_loader(int argc, char **argv) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) { + if (!obj) { p_err("failed to open object file"); goto err_close_obj; } @@ -2132,7 +2199,7 @@ static char *profile_target_name(int tgt_fd) } btf = btf__load_from_kernel_by_id(info.btf_id); - if (libbpf_get_error(btf)) { + if (!btf) { p_err("failed to load btf for prog FD %d", tgt_fd); goto out; } @@ -2338,6 +2405,7 @@ static int do_help(int argc, char **argv) " [type TYPE] [dev NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" + " [autoattach]\n" " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s run PROG \\\n" diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index e08a6ff2866c..903b80ff4e9a 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void) return btf_vmlinux; btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + if (!btf_vmlinux) p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); return btf_vmlinux; @@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) const char *st_ops_name; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (!kern_btf) return "<btf_vmlinux_not_found>"; t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); @@ -63,10 +63,8 @@ static __s32 get_map_info_type_id(void) return map_info_type_id; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) { - map_info_type_id = PTR_ERR(kern_btf); - return map_info_type_id; - } + if (!kern_btf) + return 0; map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info", BTF_KIND_STRUCT); @@ -415,7 +413,7 @@ static int do_dump(int argc, char **argv) } kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (!kern_btf) return -1; if (!json_output) { @@ -498,7 +496,7 @@ static int do_register(int argc, char **argv) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) + if (!obj) return -1; set_max_rlimit(); @@ -513,10 +511,9 @@ static int do_register(int argc, char **argv) continue; link = bpf_map__attach_struct_ops(map); - if (libbpf_get_error(link)) { + if (!link) { p_err("can't register struct_ops %s: %s", - bpf_map__name(map), - strerror(-PTR_ERR(link))); + bpf_map__name(map), strerror(errno)); nr_errs++; continue; } @@ -593,8 +590,7 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - if (!libbpf_get_error(btf_vmlinux)) - btf__free(btf_vmlinux); + btf__free(btf_vmlinux); return err; } diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 2d9cd6a7b3c8..6fe3134ae45d 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdarg.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 4c1966f7c77a..9b3c528bab92 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -4,6 +4,7 @@ #include <linux/compiler.h> #include <linux/types.h> +#include <linux/bitops.h> /* * Atomic operations that C can't guarantee us. Useful for @@ -69,4 +70,26 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) return cmpxchg(&(v)->counter, oldval, newval); } +static inline int test_and_set_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_or(addr, mask); + return !!(old & mask); +} + +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_and(addr, ~mask); + return !!(old & mask); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h index 2f6ea28764a7..ab37a221b41a 100644 --- a/tools/include/asm-generic/bitops/atomic.h +++ b/tools/include/asm-generic/bitops/atomic.h @@ -5,14 +5,11 @@ #include <asm/types.h> #include <asm/bitsperlong.h> -static inline void set_bit(int nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); -} - -static inline void clear_bit(int nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); -} +/* + * Just alias the test versions, all of the compiler built-in atomics "fetch", + * and optimizing compile-time constants on x86 isn't worth the complexity. + */ +#define set_bit test_and_set_bit +#define clear_bit test_and_clear_bit #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */ diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h new file mode 100644 index 000000000000..6093fa6db260 --- /dev/null +++ b/tools/include/linux/bitfield.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name> + * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com> + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include <linux/build_bug.h> +#include <asm/byteorder.h> + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + +/** + * FIELD_FIT() - check if value fits in the field + * @_mask: shifted mask defining the field's length and position + * @_val: value to test against the field + * + * Return: true if @_val can fit inside @_mask, false if @_val is too big. + */ +#define FIELD_FIT(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ + !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +extern void __compiletime_error("value doesn't fit into mask") +__field_overflow(void); +extern void __compiletime_error("bad bitfield mask") +__bad_mask(void); +static __always_inline u64 field_multiplier(u64 field) +{ + if ((field | (field - 1)) & ((field | (field - 1)) + 1)) + __bad_mask(); + return field & -field; +} +static __always_inline u64 field_mask(u64 field) +{ + return field / field_multiplier(field); +} +#define field_max(field) ((typeof(field))field_mask(field)) +#define ____MAKE_OP(type,base,to,from) \ +static __always_inline __##type type##_encode_bits(base v, base field) \ +{ \ + if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ + __field_overflow(); \ + return to((v & field_mask(field)) * field_multiplier(field)); \ +} \ +static __always_inline __##type type##_replace_bits(__##type old, \ + base val, base field) \ +{ \ + return (old & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline void type##p_replace_bits(__##type *p, \ + base val, base field) \ +{ \ + *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline base type##_get_bits(__##type v, base field) \ +{ \ + return (from(v) & field)/field_multiplier(field); \ +} +#define __MAKE_OP(size) \ + ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ + ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ + ____MAKE_OP(u##size,u##size,,) +____MAKE_OP(u8,u8,,) +__MAKE_OP(16) +__MAKE_OP(32) +__MAKE_OP(64) +#undef __MAKE_OP +#undef ____MAKE_OP + +#endif diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 65d0747c5205..f3566ea0f932 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -78,40 +78,6 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, } /** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - */ -static inline int test_and_set_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old | mask; - - return (old & mask) != 0; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - */ -static inline int test_and_clear_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old & ~mask; - - return (old & mask) != 0; -} - -/** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits */ diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h new file mode 100644 index 000000000000..aaa8a0767aa3 --- /dev/null +++ b/tools/include/linux/interval_tree_generic.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + Interval Trees + (C) 2012 Michel Lespinasse <walken@google.com> + + + include/linux/interval_tree_generic.h +*/ + +#include <linux/rbtree_augmented.h> + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \ + ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + bool leftmost = true; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else { \ + link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node, *leftmost; \ + \ + if (!root->rb_root.rb_node) \ + return NULL; \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 51b9aa640ad2..464ca3f01fe7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -922,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -935,6 +942,7 @@ enum bpf_map_type { BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -2576,14 +2584,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -2639,7 +2652,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -2800,12 +2813,10 @@ union bpf_attr { * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * @@ -5282,7 +5293,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5292,7 +5303,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5302,7 +5313,7 @@ union bpf_attr { * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr or if *flags* is not 0. * - * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description * Get a pointer to the underlying dynptr data. * @@ -5403,7 +5414,7 @@ union bpf_attr { * Drain samples from the specified user ring buffer, and invoke * the provided callback for each such sample: * - * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx); * * If **callback_fn** returns 0, the helper will continue to try * and drain the next sample, up to a maximum of @@ -5435,226 +5446,272 @@ union bpf_attr { * **-E2BIG** if user-space has tried to publish a sample which is * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), \ - FN(skc_to_tcp6_sock), \ - FN(skc_to_tcp_sock), \ - FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), \ - FN(get_task_stack), \ - FN(load_hdr_opt), \ - FN(store_hdr_opt), \ - FN(reserve_hdr_opt), \ - FN(inode_storage_get), \ - FN(inode_storage_delete), \ - FN(d_path), \ - FN(copy_from_user), \ - FN(snprintf_btf), \ - FN(seq_printf_btf), \ - FN(skb_cgroup_classid), \ - FN(redirect_neigh), \ - FN(per_cpu_ptr), \ - FN(this_cpu_ptr), \ - FN(redirect_peer), \ - FN(task_storage_get), \ - FN(task_storage_delete), \ - FN(get_current_task_btf), \ - FN(bprm_opts_set), \ - FN(ktime_get_coarse_ns), \ - FN(ima_inode_hash), \ - FN(sock_from_file), \ - FN(check_mtu), \ - FN(for_each_map_elem), \ - FN(snprintf), \ - FN(sys_bpf), \ - FN(btf_find_by_name_kind), \ - FN(sys_close), \ - FN(timer_init), \ - FN(timer_set_callback), \ - FN(timer_start), \ - FN(timer_cancel), \ - FN(get_func_ip), \ - FN(get_attach_cookie), \ - FN(task_pt_regs), \ - FN(get_branch_snapshot), \ - FN(trace_vprintk), \ - FN(skc_to_unix_sock), \ - FN(kallsyms_lookup_name), \ - FN(find_vma), \ - FN(loop), \ - FN(strncmp), \ - FN(get_func_arg), \ - FN(get_func_ret), \ - FN(get_func_arg_cnt), \ - FN(get_retval), \ - FN(set_retval), \ - FN(xdp_get_buff_len), \ - FN(xdp_load_bytes), \ - FN(xdp_store_bytes), \ - FN(copy_from_user_task), \ - FN(skb_set_tstamp), \ - FN(ima_file_hash), \ - FN(kptr_xchg), \ - FN(map_lookup_percpu_elem), \ - FN(skc_to_mptcp_sock), \ - FN(dynptr_from_mem), \ - FN(ringbuf_reserve_dynptr), \ - FN(ringbuf_submit_dynptr), \ - FN(ringbuf_discard_dynptr), \ - FN(dynptr_read), \ - FN(dynptr_write), \ - FN(dynptr_data), \ - FN(tcp_raw_gen_syncookie_ipv4), \ - FN(tcp_raw_gen_syncookie_ipv6), \ - FN(tcp_raw_check_syncookie_ipv4), \ - FN(tcp_raw_check_syncookie_ipv6), \ - FN(ktime_get_tai_ns), \ - FN(user_ringbuf_drain), \ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument + */ +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -6391,6 +6448,7 @@ struct bpf_sock_ops { * the outgoing header has not * been written yet. */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -6833,6 +6891,16 @@ struct bpf_dynptr { __u64 :64; } __attribute__((aligned(8))); +struct bpf_list_head { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 0242f31e339c..901d98b865a1 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -673,6 +673,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 0d5d4419139a..21d6d29502e4 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -86,14 +86,6 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -104,9 +96,9 @@ struct kvm_userspace_memory_region { }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + *in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) @@ -1438,17 +1430,11 @@ struct kvm_vfio_spapr_tce { }; /* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) -/* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 7f5f7d2ebe1f..cf7f02c67968 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -287,3 +287,20 @@ tags: # Delete partially updated (corrupted) files on error .DELETE_ON_ERROR: + +help: + @echo 'libbpf common targets:' + @echo ' HINT: use "V=1" to enable verbose build' + @echo ' all - build libraries and pkgconfig' + @echo ' clean - remove all generated files' + @echo ' check - check abi and version info' + @echo '' + @echo 'libbpf install targets:' + @echo ' HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")' + @echo ' to adjust target desitantion, e.g. "make prefix=/usr/local install"' + @echo ' install - build and install all headers, libraries and pkgconfig' + @echo ' install_headers - install only headers to include/bpf' + @echo '' + @echo 'libbpf make targets:' + @echo ' tags - use ctags to make tag information for source code browsing' + @echo ' cscope - use cscope to make interactive source code browsing database' diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1d49a0352836..9aff98f42a3d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -935,58 +935,98 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID); } -int bpf_prog_get_fd_by_id(__u32 id) +int bpf_prog_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.prog_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_map_get_fd_by_id(__u32 id) +int bpf_prog_get_fd_by_id(__u32 id) +{ + return bpf_prog_get_fd_by_id_opts(id, NULL); +} + +int bpf_map_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.map_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_btf_get_fd_by_id(__u32 id) +int bpf_map_get_fd_by_id(__u32 id) +{ + return bpf_map_get_fd_by_id_opts(id, NULL); +} + +int bpf_btf_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.btf_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_link_get_fd_by_id(__u32 id) +int bpf_btf_get_fd_by_id(__u32 id) +{ + return bpf_btf_get_fd_by_id_opts(id, NULL); +} + +int bpf_link_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.link_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } +int bpf_link_get_fd_by_id(__u32 id) +{ + return bpf_link_get_fd_by_id_opts(id, NULL); +} + int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) { const size_t attr_sz = offsetofend(union bpf_attr, info); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 9c50beabdd14..7468978d3c27 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -365,10 +365,26 @@ LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); + +struct bpf_get_fd_by_id_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 open_flags; /* permissions requested for the operation on fd */ + size_t :0; +}; +#define bpf_get_fd_by_id_opts__last_field open_flags + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); struct bpf_prog_query_opts { @@ -393,8 +409,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); +#ifdef __cplusplus +/* forward-declaring enums in C++ isn't compatible with pure C enums, so + * instead define bpf_enable_stats() as accepting int as an input + */ +LIBBPF_API int bpf_enable_stats(int type); +#else enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); +#endif struct bpf_prog_bind_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d88647da2c7f..71e165b09ed5 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1559,15 +1559,15 @@ struct btf_pipe { static int btf_rewrite_str(__u32 *str_off, void *ctx) { struct btf_pipe *p = ctx; - void *mapped_off; + long mapped_off; int off, err; if (!*str_off) /* nothing to do for empty strings */ return 0; if (p->str_off_map && - hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { - *str_off = (__u32)(long)mapped_off; + hashmap__find(p->str_off_map, *str_off, &mapped_off)) { + *str_off = mapped_off; return 0; } @@ -1579,7 +1579,7 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx) * performing expensive string comparisons. */ if (p->str_off_map) { - err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); + err = hashmap__append(p->str_off_map, *str_off, off); if (err) return err; } @@ -1630,8 +1630,8 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) return 0; } -static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); -static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); +static size_t btf_dedup_identity_hash_fn(long key, void *ctx); +static bool btf_dedup_equal_fn(long k1, long k2, void *ctx); int btf__add_btf(struct btf *btf, const struct btf *src_btf) { @@ -1724,7 +1724,8 @@ err_out: memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); /* and now restore original strings section size; types data size - * wasn't modified, so doesn't need restoring, see big comment above */ + * wasn't modified, so doesn't need restoring, see big comment above + */ btf->hdr->str_len = old_strs_len; hashmap__free(p.str_off_map); @@ -2329,7 +2330,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) */ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) { - if (!value|| !value[0]) + if (!value || !value[0]) return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); @@ -2881,6 +2882,7 @@ static int btf_dedup_strings(struct btf_dedup *d); static int btf_dedup_prim_types(struct btf_dedup *d); static int btf_dedup_struct_types(struct btf_dedup *d); static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_resolve_fwds(struct btf_dedup *d); static int btf_dedup_compact_types(struct btf_dedup *d); static int btf_dedup_remap_types(struct btf_dedup *d); @@ -2988,15 +2990,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * Algorithm summary * ================= * - * Algorithm completes its work in 6 separate passes: + * Algorithm completes its work in 7 separate passes: * * 1. Strings deduplication. * 2. Primitive types deduplication (int, enum, fwd). * 3. Struct/union types deduplication. - * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * 4. Resolve unambiguous forward declarations. + * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func * protos, and const/volatile/restrict modifiers). - * 5. Types compaction. - * 6. Types remapping. + * 6. Types compaction. + * 7. Types remapping. * * Algorithm determines canonical type descriptor, which is a single * representative type for each truly unique type. This canonical type is the @@ -3060,6 +3063,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) pr_debug("btf_dedup_struct_types failed:%d\n", err); goto done; } + err = btf_dedup_resolve_fwds(d); + if (err < 0) { + pr_debug("btf_dedup_resolve_fwds failed:%d\n", err); + goto done; + } err = btf_dedup_ref_types(d); if (err < 0) { pr_debug("btf_dedup_ref_types failed:%d\n", err); @@ -3126,12 +3134,11 @@ static long hash_combine(long h, long value) } #define for_each_dedup_cand(d, node, hash) \ - hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) + hashmap__for_each_key_entry(d->dedup_table, node, hash) static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) { - return hashmap__append(d->dedup_table, - (void *)hash, (void *)(long)type_id); + return hashmap__append(d->dedup_table, hash, type_id); } static int btf_dedup_hypot_map_add(struct btf_dedup *d, @@ -3178,17 +3185,17 @@ static void btf_dedup_free(struct btf_dedup *d) free(d); } -static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) +static size_t btf_dedup_identity_hash_fn(long key, void *ctx) { - return (size_t)key; + return key; } -static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) +static size_t btf_dedup_collision_hash_fn(long key, void *ctx) { return 0; } -static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) +static bool btf_dedup_equal_fn(long k1, long k2, void *ctx) { return k1 == k2; } @@ -3404,23 +3411,17 @@ static long btf_hash_enum(struct btf_type *t) { long h; - /* don't hash vlen and enum members to support enum fwd resolving */ + /* don't hash vlen, enum members and size to support enum fwd resolving */ h = hash_combine(0, t->name_off); - h = hash_combine(h, t->info & ~0xffff); - h = hash_combine(h, t->size); return h; } -/* Check structural equality of two ENUMs. */ -static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2) { const struct btf_enum *m1, *m2; __u16 vlen; int i; - if (!btf_equal_common(t1, t2)) - return false; - vlen = btf_vlen(t1); m1 = btf_enum(t1); m2 = btf_enum(t2); @@ -3433,15 +3434,12 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } -static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2) +static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2) { const struct btf_enum64 *m1, *m2; __u16 vlen; int i; - if (!btf_equal_common(t1, t2)) - return false; - vlen = btf_vlen(t1); m1 = btf_enum64(t1); m2 = btf_enum64(t2); @@ -3455,6 +3453,19 @@ static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2) return true; } +/* Check structural equality of two ENUMs or ENUM64s. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_equal_common(t1, t2)) + return false; + + /* t1 & t2 kinds are identical because of btf_equal_common */ + if (btf_kind(t1) == BTF_KIND_ENUM) + return btf_equal_enum_members(t1, t2); + else + return btf_equal_enum64_members(t1, t2); +} + static inline bool btf_is_enum_fwd(struct btf_type *t) { return btf_is_any_enum(t) && btf_vlen(t) == 0; @@ -3464,21 +3475,14 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) { if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) return btf_equal_enum(t1, t2); - /* ignore vlen when comparing */ - return t1->name_off == t2->name_off && - (t1->info & ~0xffff) == (t2->info & ~0xffff) && - t1->size == t2->size; -} - -static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2) -{ - if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) - return btf_equal_enum64(t1, t2); - - /* ignore vlen when comparing */ + /* At this point either t1 or t2 or both are forward declarations, thus: + * - skip comparing vlen because it is zero for forward declarations; + * - skip comparing size to allow enum forward declarations + * to be compatible with enum64 full declarations; + * - skip comparing kind for the same reason. + */ return t1->name_off == t2->name_off && - (t1->info & ~0xffff) == (t2->info & ~0xffff) && - t1->size == t2->size; + btf_is_any_enum(t1) && btf_is_any_enum(t2); } /* @@ -3753,7 +3757,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_INT: h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_int_tag(t, cand)) { new_id = cand_id; @@ -3763,9 +3767,10 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: h = btf_hash_enum(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_enum(t, cand)) { new_id = cand_id; @@ -3783,32 +3788,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) } break; - case BTF_KIND_ENUM64: - h = btf_hash_enum(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = btf_type_by_id(d->btf, cand_id); - if (btf_equal_enum64(t, cand)) { - new_id = cand_id; - break; - } - if (btf_compat_enum64(t, cand)) { - if (btf_is_enum_fwd(t)) { - /* resolve fwd to full enum */ - new_id = cand_id; - break; - } - /* resolve canonical enum fwd to full enum */ - d->map[cand_id] = type_id; - } - } - break; - case BTF_KIND_FWD: case BTF_KIND_FLOAT: h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; @@ -3887,14 +3871,14 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) } /* Check if given two types are identical ARRAY definitions */ -static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) { struct btf_type *t1, *t2; t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); if (!btf_is_array(t1) || !btf_is_array(t2)) - return 0; + return false; return btf_equal_array(t1, t2); } @@ -3918,7 +3902,9 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id m1 = btf_members(t1); m2 = btf_members(t2); for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type) + if (m1->type != m2->type && + !btf_dedup_identical_arrays(d, m1->type, m2->type) && + !btf_dedup_identical_structs(d, m1->type, m2->type)) return false; } return true; @@ -4097,10 +4083,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: - return btf_compat_enum(cand_type, canon_type); - case BTF_KIND_ENUM64: - return btf_compat_enum64(cand_type, canon_type); + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: case BTF_KIND_FLOAT: @@ -4311,7 +4295,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_struct(t); for_each_dedup_cand(d, hash_entry, h) { - __u32 cand_id = (__u32)(long)hash_entry->value; + __u32 cand_id = hash_entry->value; int eq; /* @@ -4416,7 +4400,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; @@ -4433,7 +4417,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_int_tag(t, cand)) { new_id = cand_id; @@ -4457,7 +4441,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_array(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_array(t, cand)) { new_id = cand_id; @@ -4489,7 +4473,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_fnproto(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_fnproto(t, cand)) { new_id = cand_id; @@ -4526,6 +4510,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d) } /* + * Collect a map from type names to type ids for all canonical structs + * and unions. If the same name is shared by several canonical types + * use a special value 0 to indicate this fact. + */ +static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map) +{ + __u32 nr_types = btf__type_cnt(d->btf); + struct btf_type *t; + __u32 type_id; + __u16 kind; + int err; + + /* + * Iterate over base and split module ids in order to get all + * available structs in the map. + */ + for (type_id = 1; type_id < nr_types; ++type_id) { + t = btf_type_by_id(d->btf, type_id); + kind = btf_kind(t); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + continue; + + /* Skip non-canonical types */ + if (type_id != d->map[type_id]) + continue; + + err = hashmap__add(names_map, t->name_off, type_id); + if (err == -EEXIST) + err = hashmap__set(names_map, t->name_off, 0, NULL, NULL); + + if (err) + return err; + } + + return 0; +} + +static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id) +{ + struct btf_type *t = btf_type_by_id(d->btf, type_id); + enum btf_fwd_kind fwd_kind = btf_kflag(t); + __u16 cand_kind, kind = btf_kind(t); + struct btf_type *cand_t; + uintptr_t cand_id; + + if (kind != BTF_KIND_FWD) + return 0; + + /* Skip if this FWD already has a mapping */ + if (type_id != d->map[type_id]) + return 0; + + if (!hashmap__find(names_map, t->name_off, &cand_id)) + return 0; + + /* Zero is a special value indicating that name is not unique */ + if (!cand_id) + return 0; + + cand_t = btf_type_by_id(d->btf, cand_id); + cand_kind = btf_kind(cand_t); + if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) || + (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION)) + return 0; + + d->map[type_id] = cand_id; + + return 0; +} + +/* + * Resolve unambiguous forward declarations. + * + * The lion's share of all FWD declarations is resolved during + * `btf_dedup_struct_types` phase when different type graphs are + * compared against each other. However, if in some compilation unit a + * FWD declaration is not a part of a type graph compared against + * another type graph that declaration's canonical type would not be + * changed. Example: + * + * CU #1: + * + * struct foo; + * struct foo *some_global; + * + * CU #2: + * + * struct foo { int u; }; + * struct foo *another_global; + * + * After `btf_dedup_struct_types` the BTF looks as follows: + * + * [1] STRUCT 'foo' size=4 vlen=1 ... + * [2] INT 'int' size=4 ... + * [3] PTR '(anon)' type_id=1 + * [4] FWD 'foo' fwd_kind=struct + * [5] PTR '(anon)' type_id=4 + * + * This pass assumes that such FWD declarations should be mapped to + * structs or unions with identical name in case if the name is not + * ambiguous. + */ +static int btf_dedup_resolve_fwds(struct btf_dedup *d) +{ + int i, err; + struct hashmap *names_map; + + names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(names_map)) + return PTR_ERR(names_map); + + err = btf_dedup_fill_unique_names_map(d, names_map); + if (err < 0) + goto exit; + + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i); + if (err < 0) + break; + } + +exit: + hashmap__free(names_map); + return err; +} + +/* * Compact types. * * After we established for each type its corresponding canonical representative diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 4221f73a74d0..deb2bc9a0a7b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -117,14 +117,14 @@ struct btf_dump { struct btf_dump_data *typed_dump; }; -static size_t str_hash_fn(const void *key, void *ctx) +static size_t str_hash_fn(long key, void *ctx) { - return str_hash(key); + return str_hash((void *)key); } -static bool str_equal_fn(const void *a, const void *b, void *ctx) +static bool str_equal_fn(long a, long b, void *ctx) { - return strcmp(a, b) == 0; + return strcmp((void *)a, (void *)b) == 0; } static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) @@ -219,6 +219,17 @@ static int btf_dump_resize(struct btf_dump *d) return 0; } +static void btf_dump_free_names(struct hashmap *map) +{ + size_t bkt; + struct hashmap_entry *cur; + + hashmap__for_each_entry(map, cur, bkt) + free((void *)cur->pkey); + + hashmap__free(map); +} + void btf_dump__free(struct btf_dump *d) { int i; @@ -237,8 +248,8 @@ void btf_dump__free(struct btf_dump *d) free(d->cached_names); free(d->emit_queue); free(d->decl_stack); - hashmap__free(d->type_names); - hashmap__free(d->ident_names); + btf_dump_free_names(d->type_names); + btf_dump_free_names(d->ident_names); free(d); } @@ -944,7 +955,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, lvl + 1); } - if (vlen) + /* + * Keep `struct empty {}` on a single line, + * only print newline when there are regular or padding fields. + */ + if (vlen || t->size) btf_dump_printf(d, "\n"); btf_dump_printf(d, "%s}", pfx(lvl)); if (packed) @@ -1520,11 +1535,22 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) { + char *old_name, *new_name; size_t dup_cnt = 0; + int err; + + new_name = strdup(orig_name); + if (!new_name) + return 1; - hashmap__find(name_map, orig_name, (void **)&dup_cnt); + (void)hashmap__find(name_map, orig_name, &dup_cnt); dup_cnt++; - hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL); + if (err) + free(new_name); + + free(old_name); return dup_cnt; } @@ -1963,7 +1989,7 @@ static int btf_dump_struct_data(struct btf_dump *d, { const struct btf_member *m = btf_members(t); __u16 n = btf_vlen(t); - int i, err; + int i, err = 0; /* note that we increment depth before calling btf_dump_print() below; * this is intentional. btf_dump_data_newline() will not print a diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index aeb09c288716..140ee4055676 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map) } static bool hashmap_find_entry(const struct hashmap *map, - const void *key, size_t hash, + const long key, size_t hash, struct hashmap_entry ***pprev, struct hashmap_entry **entry) { @@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map, return false; } -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value) +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value) { struct hashmap_entry *entry; size_t h; int err; if (old_key) - *old_key = NULL; + *old_key = 0; if (old_value) - *old_value = NULL; + *old_value = 0; h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); if (strategy != HASHMAP_APPEND && @@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value, return 0; } -bool hashmap__find(const struct hashmap *map, const void *key, void **value) +bool hashmap_find(const struct hashmap *map, long key, long *value) { struct hashmap_entry *entry; size_t h; @@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value) return true; } -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value) +bool hashmap_delete(struct hashmap *map, long key, + long *old_key, long *old_value) { struct hashmap_entry **pprev, *entry; size_t h; diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index 10a4c4cd13cf..0a5bf1937a7c 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s) return h; } -typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); -typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); +typedef size_t (*hashmap_hash_fn)(long key, void *ctx); +typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx); +/* + * Hashmap interface is polymorphic, keys and values could be either + * long-sized integers or pointers, this is achieved as follows: + * - interface functions that operate on keys and values are hidden + * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert; + * - these auxiliary macros cast the key and value parameters as + * long or long *, so the user does not have to specify the casts explicitly; + * - for pointer parameters (e.g. old_key) the size of the pointed + * type is verified by hashmap_cast_ptr using _Static_assert; + * - when iterating using hashmap__for_each_* forms + * hasmap_entry->key should be used for integer keys and + * hasmap_entry->pkey should be used for pointer keys, + * same goes for values. + */ struct hashmap_entry { - const void *key; - void *value; + union { + long key; + const void *pkey; + }; + union { + long value; + void *pvalue; + }; struct hashmap_entry *next; }; @@ -102,6 +122,13 @@ enum hashmap_insert_strategy { HASHMAP_APPEND, }; +#define hashmap_cast_ptr(p) ({ \ + _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \ + sizeof(*(p)) == sizeof(long), \ + #p " pointee should be a long-sized integer or a pointer"); \ + (long *)(p); \ +}) + /* * hashmap__insert() adds key/value entry w/ various semantics, depending on * provided strategy value. If a given key/value pair replaced already @@ -109,42 +136,38 @@ enum hashmap_insert_strategy { * through old_key and old_value to allow calling code do proper memory * management. */ -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value); +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value); -static inline int hashmap__add(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); -} +#define hashmap__insert(map, key, value, strategy, old_key, old_value) \ + hashmap_insert((map), (long)(key), (long)(value), (strategy), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -static inline int hashmap__set(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_SET, - old_key, old_value); -} +#define hashmap__add(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL) -static inline int hashmap__update(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_UPDATE, - old_key, old_value); -} +#define hashmap__set(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value)) -static inline int hashmap__append(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); -} +#define hashmap__update(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value)) + +#define hashmap__append(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL) + +bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value); + +#define hashmap__delete(map, key, old_key, old_value) \ + hashmap_delete((map), (long)(key), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value); +bool hashmap_find(const struct hashmap *map, long key, long *value); -bool hashmap__find(const struct hashmap *map, const void *key, void **value); +#define hashmap__find(map, key, value) \ + hashmap_find((map), (long)(key), hashmap_cast_ptr(value)) /* * hashmap__for_each_entry - iterate over all entries in hashmap diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 184ce1684dcd..2a82f49ce16f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -164,6 +164,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", + [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", }; static const char * const prog_type_name[] = { @@ -346,7 +347,8 @@ enum sec_def_flags { SEC_ATTACHABLE = 2, SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, /* attachment target is specified through BTF ID in either kernel or - * other BPF program's BTF object */ + * other BPF program's BTF object + */ SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, @@ -487,7 +489,7 @@ struct bpf_map { char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section - * name. This is important to be be able to find corresponding BTF + * name. This is important to be able to find corresponding BTF * DATASEC information. */ char *real_name; @@ -597,7 +599,7 @@ struct elf_state { size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; - int sec_cnt; + size_t sec_cnt; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; @@ -1408,6 +1410,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj) static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { + if (!data) { + pr_warn("invalid license section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't * go over allowed ELF data section buffer */ @@ -1421,7 +1427,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) { __u32 kver; - if (size != sizeof(kver)) { + if (!data || size != sizeof(kver)) { pr_warn("invalid kver section in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1457,15 +1463,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 return -ENOENT; } -static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) +static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) { Elf_Data *symbols = obj->efile.symbols; const char *sname; size_t si; - if (!name || !off) - return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); @@ -1479,15 +1482,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ sname = elf_sym_str(obj, sym->st_name); if (!sname) { pr_warn("failed to get sym name string for var %s\n", name); - return -EIO; - } - if (strcmp(name, sname) == 0) { - *off = sym->st_value; - return 0; + return ERR_PTR(-EIO); } + if (strcmp(name, sname) == 0) + return sym; } - return -ENOENT; + return ERR_PTR(-ENOENT); } static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) @@ -1578,7 +1579,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) } static int -bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map); +map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); + +/* Internal BPF map is mmap()'able only if at least one of corresponding + * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL + * variable and it's not marked as __hidden (which turns it into, effectively, + * a STATIC variable). + */ +static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) +{ + const struct btf_type *t, *vt; + struct btf_var_secinfo *vsi; + int i, n; + + if (!map->btf_value_type_id) + return false; + + t = btf__type_by_id(obj->btf, map->btf_value_type_id); + if (!btf_is_datasec(t)) + return false; + + vsi = btf_var_secinfos(t); + for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { + vt = btf__type_by_id(obj->btf, vsi->type); + if (!btf_is_var(vt)) + continue; + + if (btf_var(vt)->linkage != BTF_VAR_STATIC) + return true; + } + + return false; +} static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, @@ -1610,7 +1642,12 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG ? BPF_F_RDONLY_PROG : 0; - def->map_flags |= BPF_F_MMAPABLE; + + /* failures are fine because of maps like .rodata.str1.1 */ + (void) map_fill_btf_type_info(obj, map); + + if (map_is_mmapable(obj, map)) + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); @@ -1627,9 +1664,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, return err; } - /* failures are fine because of maps like .rodata.str1.1 */ - (void) bpf_map_find_btf_info(obj, map); - if (data) memcpy(map->mmaped, data, data_sz); @@ -1830,12 +1864,20 @@ static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, return -ERANGE; } switch (ext->kcfg.sz) { - case 1: *(__u8 *)ext_val = value; break; - case 2: *(__u16 *)ext_val = value; break; - case 4: *(__u32 *)ext_val = value; break; - case 8: *(__u64 *)ext_val = value; break; - default: - return -EINVAL; + case 1: + *(__u8 *)ext_val = value; + break; + case 2: + *(__u16 *)ext_val = value; + break; + case 4: + *(__u32 *)ext_val = value; + break; + case 8: + *(__u64 *)ext_val = value; + break; + default: + return -EINVAL; } ext->is_set = true; return 0; @@ -2541,7 +2583,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, fill_map_from_def(map->inner_map, &inner_def); } - err = bpf_map_find_btf_info(obj, map); + err = map_fill_btf_type_info(obj, map); if (err) return err; @@ -2737,7 +2779,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) m->type = enum64_placeholder_id; m->offset = 0; } - } + } } return 0; @@ -2846,57 +2888,89 @@ static int compare_vsi_off(const void *_a, const void *_b) static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, struct btf_type *t) { - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; + __u32 size = 0, i, vars = btf_vlen(t); + const char *sec_name = btf__name_by_offset(btf, t->name_off); struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; + bool fixup_offsets = false; + int err; - if (!name) { + if (!sec_name) { pr_debug("No name found in string section for DATASEC kind.\n"); return -ENOENT; } - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables + /* Extern-backing datasecs (.ksyms, .kconfig) have their size and + * variable offsets set at the previous step. Further, not every + * extern BTF VAR has corresponding ELF symbol preserved, so we skip + * all fixups altogether for such sections and go straight to sorting + * VARs within their DATASEC. */ - if (t->size) + if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) goto sort_vars; - ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } + /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to + * fix this up. But BPF static linker already fixes this up and fills + * all the sizes and offsets during static linking. So this step has + * to be optional. But the STV_HIDDEN handling is non-optional for any + * non-extern DATASEC, so the variable fixup loop below handles both + * functions at the same time, paying the cost of BTF VAR <-> ELF + * symbol matching just once. + */ + if (t->size == 0) { + err = find_elf_sec_sz(obj, sec_name, &size); + if (err || !size) { + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", + sec_name, size, err); + return -ENOENT; + } - t->size = size; + t->size = size; + fixup_offsets = true; + } for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + const struct btf_type *t_var; + struct btf_var *var; + const char *var_name; + Elf64_Sym *sym; + t_var = btf__type_by_id(btf, vsi->type); if (!t_var || !btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); + pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); return -EINVAL; } var = btf_var(t_var); - if (var->linkage == BTF_VAR_STATIC) + if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) continue; - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); + var_name = btf__name_by_offset(btf, t_var->name_off); + if (!var_name) { + pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", + sec_name, i); return -ENOENT; } - ret = find_elf_var_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); + sym = find_elf_var_sym(obj, var_name); + if (IS_ERR(sym)) { + pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", + sec_name, var_name); return -ENOENT; } - vsi->offset = off; + if (fixup_offsets) + vsi->offset = sym->st_value; + + /* if variable is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR + * as static. This follows similar logic for functions (BPF + * subprogs) and influences libbpf's further decisions about + * whether to make global data BPF array maps as + * BPF_F_MMAPABLE. + */ + if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) + var->linkage = BTF_VAR_STATIC; } sort_vars: @@ -2904,13 +2978,16 @@ sort_vars: return 0; } -static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +static int bpf_object_fixup_btf(struct bpf_object *obj) { - int err = 0; - __u32 i, n = btf__type_cnt(btf); + int i, n, err = 0; + if (!obj->btf) + return 0; + + n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { - struct btf_type *t = btf_type_by_id(btf, i); + struct btf_type *t = btf_type_by_id(obj->btf, i); /* Loader needs to fix up some of the things compiler * couldn't get its hands on while emitting BTF. This @@ -2918,28 +2995,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) * the info from the ELF itself for this purpose. */ if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); + err = btf_fixup_datasec(obj, obj->btf, t); if (err) - break; + return err; } } - return libbpf_err(err); -} - -static int bpf_object__finalize_btf(struct bpf_object *obj) -{ - int err; - - if (!obj->btf) - return 0; - - err = btf_finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - return err; - } - return 0; } @@ -3312,10 +3373,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf64_Shdr *sh; /* ELF section indices are 0-based, but sec #0 is special "invalid" - * section. e_shnum does include sec #0, so e_shnum is the necessary - * size of an array to keep all the sections. + * section. Since section count retrieved by elf_getshdrnum() does + * include sec #0, it is already the necessary size of an array to keep + * all the sections. */ - obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; + if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { + pr_warn("elf: failed to get the number of sections for %s: %s\n", + obj->path, elf_errmsg(-1)); + return -LIBBPF_ERRNO__FORMAT; + } obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); if (!obj->efile.secs) return -ENOMEM; @@ -3445,7 +3511,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RELO; sec_desc->shdr = sh; sec_desc->data = data; - } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || + str_has_pfx(name, BSS_SEC "."))) { sec_desc->sec_type = SEC_BSS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3461,7 +3528,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } /* sort BPF programs by section name and in-section instruction offset - * for faster search */ + * for faster search + */ if (obj->nr_programs) qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); @@ -3760,7 +3828,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -EINVAL; } ext->kcfg.type = find_kcfg_type(obj->btf, t->type, - &ext->kcfg.is_signed); + &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; @@ -4106,6 +4174,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, int l = 0, r = obj->nr_programs - 1, m; struct bpf_program *prog; + if (!obj->nr_programs) + return NULL; + while (l < r) { m = l + (r - l + 1) / 2; prog = &obj->programs[m]; @@ -4223,7 +4294,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return 0; } -static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) +static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) { int id; @@ -4905,9 +4976,9 @@ bpf_object__reuse_map(struct bpf_map *map) err = bpf_map__reuse_fd(map, pin_fd); close(pin_fd); - if (err) { + if (err) return err; - } + map->pinned = true; pr_debug("reused pinned map at '%s'\n", map->pin_path); @@ -5425,7 +5496,7 @@ static int load_module_btfs(struct bpf_object *obj) } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, - sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); + sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) goto err_out; @@ -5541,21 +5612,16 @@ int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); } -static size_t bpf_core_hash_fn(const void *key, void *ctx) +static size_t bpf_core_hash_fn(const long key, void *ctx) { - return (size_t)key; + return key; } -static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) +static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) { return k1 == k2; } -static void *u32_as_hash_key(__u32 x) -{ - return (void *)(uintptr_t)x; -} - static int record_relo_core(struct bpf_program *prog, const struct bpf_core_relo *core_relo, int insn_idx) { @@ -5598,7 +5664,6 @@ static int bpf_core_resolve_relo(struct bpf_program *prog, struct bpf_core_relo_res *targ_res) { struct bpf_core_spec specs_scratch[3] = {}; - const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; const struct btf_type *local_type; @@ -5615,7 +5680,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog, return -EINVAL; if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && - !hashmap__find(cand_cache, type_key, (void **)&cands)) { + !hashmap__find(cand_cache, local_id, &cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", @@ -5623,7 +5688,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog, local_name, PTR_ERR(cands)); return PTR_ERR(cands); } - err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); + err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); if (err) { bpf_core_free_cands(cands); return err; @@ -5746,7 +5811,7 @@ out: if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { - bpf_core_free_cands(entry->value); + bpf_core_free_cands(entry->pvalue); } hashmap__free(cand_cache); } @@ -6183,7 +6248,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * prog; each main prog can have a different set of * subprograms appended (potentially in different order as * well), so position of any subprog can be different for - * different main programs */ + * different main programs + */ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", @@ -7221,7 +7287,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); - err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object_fixup_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); @@ -10941,7 +11007,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, - usdt_provider, usdt_name, usdt_cookie); + usdt_provider, usdt_name, usdt_cookie); err = libbpf_get_error(link); if (err) return libbpf_err_ptr(err); @@ -11169,7 +11235,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ @@ -12250,7 +12316,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) btf = bpf_object__btf(s->obj); if (!btf) { pr_warn("subskeletons require BTF at runtime (object %s)\n", - bpf_object__name(s->obj)); + bpf_object__name(s->obj)); return libbpf_err(-errno); } diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c1d6aa7c82b6..71bf5691a689 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -367,10 +367,14 @@ LIBBPF_1.0.0 { libbpf_bpf_map_type_str; libbpf_bpf_prog_type_str; perf_buffer__buffer; -}; +} LIBBPF_0.8.0; LIBBPF_1.1.0 { global: + bpf_btf_get_fd_by_id_opts; + bpf_link_get_fd_by_id_opts; + bpf_map_get_fd_by_id_opts; + bpf_prog_get_fd_by_id_opts; user_ring_buffer__discard; user_ring_buffer__free; user_ring_buffer__new; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..b44fcbb4b42e 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -221,6 +221,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; @@ -234,7 +235,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..47855af25f3b 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -128,9 +128,13 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, /* Map read-only producer page and data pages. We map twice as big * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. - * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + */ + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); @@ -220,7 +224,7 @@ static inline int roundup_len(__u32 len) return (len + 7) / 8 * 8; } -static int64_t ringbuf_process_ring(struct ring* r) +static int64_t ringbuf_process_ring(struct ring *r) { int *len_ptr, len, err; /* 64-bit to avoid overflow in case of extreme application behavior */ @@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", @@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index ea655318153f..2464bcbd04e0 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -19,19 +19,19 @@ struct strset { struct hashmap *strs_hash; }; -static size_t strset_hash_fn(const void *key, void *ctx) +static size_t strset_hash_fn(long key, void *ctx) { const struct strset *s = ctx; - const char *str = s->strs_data + (long)key; + const char *str = s->strs_data + key; return str_hash(str); } -static bool strset_equal_fn(const void *key1, const void *key2, void *ctx) +static bool strset_equal_fn(long key1, long key2, void *ctx) { const struct strset *s = ctx; - const char *str1 = s->strs_data + (long)key1; - const char *str2 = s->strs_data + (long)key2; + const char *str1 = s->strs_data + key1; + const char *str2 = s->strs_data + key2; return strcmp(str1, str2) == 0; } @@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini /* hashmap__add() returns EEXIST if string with the same * content already is in the hash map */ - err = hashmap__add(hash, (void *)off, (void *)off); + err = hashmap__add(hash, off, off); if (err == -EEXIST) continue; /* duplicate */ if (err) @@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s) new_off = set->strs_data_len; memcpy(p, s, len); - if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off)) + if (hashmap__find(set->strs_hash, new_off, &old_off)) return old_off; return -ENOENT; @@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s) * contents doesn't exist already (HASHMAP_ADD strategy). If such * string exists, we'll get its offset in old_off (that's old_key). */ - err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off, - HASHMAP_ADD, (const void **)&old_off, NULL); + err = hashmap__insert(set->strs_hash, new_off, new_off, + HASHMAP_ADD, &old_off, NULL); if (err == -EEXIST) return old_off; /* duplicated string, return existing offset */ if (err) diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index e83b497c2245..75b411fc2c77 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -873,31 +873,27 @@ static void bpf_link_usdt_dealloc(struct bpf_link *link) free(usdt_link); } -static size_t specs_hash_fn(const void *key, void *ctx) +static size_t specs_hash_fn(long key, void *ctx) { - const char *s = key; - - return str_hash(s); + return str_hash((char *)key); } -static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) +static bool specs_equal_fn(long key1, long key2, void *ctx) { - const char *s1 = key1; - const char *s2 = key2; - - return strcmp(s1, s2) == 0; + return strcmp((char *)key1, (char *)key2) == 0; } static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, struct bpf_link_usdt *link, struct usdt_target *target, int *spec_id, bool *is_new) { - void *tmp; + long tmp; + void *new_ids; int err; /* check if we already allocated spec ID for this spec string */ if (hashmap__find(specs_hash, target->spec_str, &tmp)) { - *spec_id = (long)tmp; + *spec_id = tmp; *is_new = false; return 0; } @@ -905,17 +901,17 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash /* otherwise it's a new ID that needs to be set up in specs map and * returned back to usdt_manager when USDT link is detached */ - tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); - if (!tmp) + new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!new_ids) return -ENOMEM; - link->spec_ids = tmp; + link->spec_ids = new_ids; /* get next free spec ID, giving preference to free list, if not empty */ if (man->free_spec_cnt) { *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; /* cache spec ID for current spec string for future lookups */ - err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + err = hashmap__add(specs_hash, target->spec_str, *spec_id); if (err) return err; @@ -928,7 +924,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash *spec_id = man->next_free_spec_id; /* cache spec ID for current spec string for future lookups */ - err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + err = hashmap__add(specs_hash, target->spec_str, *spec_id); if (err) return err; @@ -1225,26 +1221,32 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { - char *reg_name = NULL; + char reg_name[16]; int arg_sz, len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) { + /* Memory dereference case without offset, e.g., 8@(%rsp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1348,25 +1350,23 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { - char *reg_name = NULL; + char reg_name[16]; int arg_sz, len, reg_off; long off; - if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { + if (sscanf(arg_str, " %d @ \[ %15[a-z0-9], %ld ] %n", &arg_sz, reg_name, &off, &len) == 3) { /* Memory dereference case, e.g., -4@[sp, 96] */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", &arg_sz, reg_name, &len) == 2) { /* Memory dereference case, e.g., -4@[sp] */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1375,12 +1375,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec arg->arg_type = USDT_ARG_CONST; arg->val_off = off; arg->reg_off = 0; - } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -8@x4 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1459,16 +1458,15 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { - char *reg_name = NULL; + char reg_name[16]; int arg_sz, len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -8@-88(s0) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1477,12 +1475,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec arg->arg_type = USDT_ARG_CONST; arg->val_off = off; arg->reg_off = 0; - } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -8@a1 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index ee819a402b69..11a1d2d4f681 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -464,9 +464,10 @@ to address dependencies, since the address of a location accessed through a pointer will depend on the value read earlier from that pointer. -Finally, a read event and another memory access event are linked by a -control dependency if the value obtained by the read affects whether -the second event is executed at all. Simple example: +Finally, a read event X and a write event Y are linked by a control +dependency if Y syntactically lies within an arm of an if statement and +X affects the evaluation of the if condition via a data or address +dependency (or similarly for a switch statement). Simple example: int x, y; diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1c253b4b7ce0..f0943830add7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -73,6 +73,30 @@ unsigned long arch_jump_destination(struct instruction *insn) return insn->offset + insn->len + insn->immediate; } +bool arch_pc_relative_reloc(struct reloc *reloc) +{ + /* + * All relocation types where P (the address of the target) + * is included in the computation. + */ + switch (reloc->type) { + case R_X86_64_PC8: + case R_X86_64_PC16: + case R_X86_64_PC32: + case R_X86_64_PC64: + + case R_X86_64_PLT32: + case R_X86_64_GOTPC32: + case R_X86_64_GOTPCREL: + return true; + + default: + break; + } + + return false; +} + #define ADD_OP(op) \ if (!(op = calloc(1, sizeof(*op)))) \ return -1; \ diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 24fbe803a0d3..868e3e363786 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -57,12 +57,17 @@ static int parse_hacks(const struct option *opt, const char *str, int unset) found = true; } + if (!str || strstr(str, "skylake")) { + opts.hack_skylake = true; + found = true; + } + return found ? 0 : -1; } const struct option check_options[] = { OPT_GROUP("Actions:"), - OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks), + OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks), OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"), OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"), OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), @@ -70,10 +75,12 @@ const struct option check_options[] = { OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), + OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"), OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"), + OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"), OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), OPT_GROUP("Options:"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43ec14c29a60..14130ab86227 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -62,12 +62,12 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, struct instruction *insn) { struct instruction *next = list_next_entry(insn, list); - struct symbol *func = insn->func; + struct symbol *func = insn_func(insn); if (!func) return NULL; - if (&next->list != &file->insn_list && next->func == func) + if (&next->list != &file->insn_list && insn_func(next) == func) return next; /* Check if we're already in the subfunction: */ @@ -83,7 +83,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, { struct instruction *prev = list_prev_entry(insn, list); - if (&prev->list != &file->insn_list && prev->func == insn->func) + if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn)) return prev; return NULL; @@ -129,16 +129,13 @@ static bool is_jump_table_jump(struct instruction *insn) static bool is_sibling_call(struct instruction *insn) { /* - * Assume only ELF functions can make sibling calls. This ensures - * sibling call detection consistency between vmlinux.o and individual - * objects. + * Assume only STT_FUNC calls have jump-tables. */ - if (!insn->func) - return false; - - /* An indirect jump is either a sibling call or a jump to a table. */ - if (insn->type == INSN_JUMP_DYNAMIC) - return !is_jump_table_jump(insn); + if (insn_func(insn)) { + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return !is_jump_table_jump(insn); + } /* add_jump_destinations() sets insn->call_dest for sibling calls. */ return (is_static_jump(insn) && insn->call_dest); @@ -207,7 +204,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; insn = find_insn(file, func->sec, func->offset); - if (!insn->func) + if (!insn_func(insn)) return false; func_for_each_insn(file, func, insn) { @@ -243,7 +240,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; } - return __dead_end_function(file, dest->func, recursion+1); + return __dead_end_function(file, insn_func(dest), recursion+1); } } @@ -382,6 +379,15 @@ static int decode_instructions(struct objtool_file *file) !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; + /* + * .init.text code is ran before userspace and thus doesn't + * strictly need retpolines, except for modules which are + * loaded late, they very much do need retpoline in their + * .init.text + */ + if (!strcmp(sec->name, ".init.text") && !opts.module) + sec->init = true; + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { @@ -418,7 +424,10 @@ static int decode_instructions(struct objtool_file *file) } list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC || func->alias != func) + if (func->type != STT_NOTYPE && func->type != STT_FUNC) + continue; + + if (func->return_thunk || func->alias != func) continue; if (!find_insn(file, sec, func->offset)) { @@ -428,9 +437,11 @@ static int decode_instructions(struct objtool_file *file) } sym_for_each_insn(file, func, insn) { - insn->func = func; - if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { - if (insn->offset == insn->func->offset) { + insn->sym = func; + if (func->type == STT_FUNC && + insn->type == INSN_ENDBR && + list_empty(&insn->call_node)) { + if (insn->offset == func->offset) { list_add_tail(&insn->call_node, &file->endbr_list); file->nr_endbr++; } else { @@ -850,6 +861,68 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) return 0; } +static int create_cfi_sections(struct objtool_file *file) +{ + struct section *sec, *s; + struct symbol *sym; + unsigned int *loc; + int idx; + + sec = find_section_by_name(file->elf, ".cfi_sites"); + if (sec) { + INIT_LIST_HEAD(&file->call_list); + WARN("file already has .cfi_sites section, skipping"); + return 0; + } + + idx = 0; + for_each_sec(file, s) { + if (!s->text) + continue; + + list_for_each_entry(sym, &s->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + + if (strncmp(sym->name, "__cfi_", 6)) + continue; + + idx++; + } + } + + sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx); + if (!sec) + return -1; + + idx = 0; + for_each_sec(file, s) { + if (!s->text) + continue; + + list_for_each_entry(sym, &s->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + + if (strncmp(sym->name, "__cfi_", 6)) + continue; + + loc = (unsigned int *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned int)); + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned int), + R_X86_64_PC32, + s, sym->offset)) + return -1; + + idx++; + } + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -893,6 +966,49 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; } +static int create_direct_call_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + unsigned int *loc; + int idx; + + sec = find_section_by_name(file->elf, ".call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->call_list); + WARN("file already has .call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->call_list, call_node) + idx++; + + sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx); + if (!sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->call_list, call_node) { + + loc = (unsigned int *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned int)); + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned int), + R_X86_64_PC32, + insn->sec, insn->offset)) + return -1; + + idx++; + } + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -999,6 +1115,16 @@ static const char *uaccess_safe_builtin[] = { "__tsan_read_write4", "__tsan_read_write8", "__tsan_read_write16", + "__tsan_volatile_read1", + "__tsan_volatile_read2", + "__tsan_volatile_read4", + "__tsan_volatile_read8", + "__tsan_volatile_read16", + "__tsan_volatile_write1", + "__tsan_volatile_write2", + "__tsan_volatile_write4", + "__tsan_volatile_write8", + "__tsan_volatile_write16", "__tsan_atomic8_load", "__tsan_atomic16_load", "__tsan_atomic32_load", @@ -1270,6 +1396,9 @@ static void annotate_call_site(struct objtool_file *file, return; } + if (insn->type == INSN_CALL && !insn->sec->init) + list_add_tail(&insn->call_node, &file->call_list); + if (!sibling && dead_end_function(file, sym)) insn->dead_end = true; } @@ -1340,21 +1469,18 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn, list_add_tail(&insn->call_node, &file->return_thunk_list); } -static bool same_function(struct instruction *insn1, struct instruction *insn2) +static bool is_first_func_insn(struct objtool_file *file, + struct instruction *insn, struct symbol *sym) { - return insn1->func->pfunc == insn2->func->pfunc; -} - -static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) -{ - if (insn->offset == insn->func->offset) + if (insn->offset == sym->offset) return true; + /* Allow direct CALL/JMP past ENDBR */ if (opts.ibt) { struct instruction *prev = prev_insn_same_sym(file, insn); if (prev && prev->type == INSN_ENDBR && - insn->offset == insn->func->offset + prev->len) + insn->offset == sym->offset + prev->len) return true; } @@ -1362,6 +1488,32 @@ static bool is_first_func_insn(struct objtool_file *file, struct instruction *in } /* + * A sibling call is a tail-call to another symbol -- to differentiate from a + * recursive tail-call which is to the same symbol. + */ +static bool jump_is_sibling_call(struct objtool_file *file, + struct instruction *from, struct instruction *to) +{ + struct symbol *fs = from->sym; + struct symbol *ts = to->sym; + + /* Not a sibling call if from/to a symbol hole */ + if (!fs || !ts) + return false; + + /* Not a sibling call if not targeting the start of a symbol. */ + if (!is_first_func_insn(file, to, ts)) + return false; + + /* Disallow sibling calls into STT_NOTYPE */ + if (ts->type == STT_NOTYPE) + return false; + + /* Must not be self to be a sibling */ + return fs->pfunc != ts->pfunc; +} + +/* * Find the destination instructions for all jumps. */ static int add_jump_destinations(struct objtool_file *file) @@ -1395,7 +1547,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->return_thunk) { add_return_call(file, insn, true); continue; - } else if (insn->func) { + } else if (insn_func(insn)) { /* * External sibling call or internal sibling call with * STT_FUNC reloc. @@ -1437,8 +1589,8 @@ static int add_jump_destinations(struct objtool_file *file) /* * Cross-function jump. */ - if (insn->func && jump_dest->func && - insn->func != jump_dest->func) { + if (insn_func(insn) && insn_func(jump_dest) && + insn_func(insn) != insn_func(jump_dest)) { /* * For GCC 8+, create parent/child links for any cold @@ -1455,22 +1607,22 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn->func->name, ".cold") && - strstr(jump_dest->func->name, ".cold")) { - insn->func->cfunc = jump_dest->func; - jump_dest->func->pfunc = insn->func; - - } else if (!same_function(insn, jump_dest) && - is_first_func_insn(file, jump_dest)) { - /* - * Internal sibling call without reloc or with - * STT_SECTION reloc. - */ - add_call_dest(file, insn, jump_dest->func, true); - continue; + if (!strstr(insn_func(insn)->name, ".cold") && + strstr(insn_func(jump_dest)->name, ".cold")) { + insn_func(insn)->cfunc = insn_func(jump_dest); + insn_func(jump_dest)->pfunc = insn_func(insn); } } + if (jump_is_sibling_call(file, insn, jump_dest)) { + /* + * Internal sibling call without reloc or with + * STT_SECTION reloc. + */ + add_call_dest(file, insn, insn_func(jump_dest), true); + continue; + } + insn->jump_dest = jump_dest; } @@ -1517,7 +1669,7 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - if (insn->func && insn->call_dest->type != STT_FUNC) { + if (insn_func(insn) && insn->call_dest->type != STT_FUNC) { WARN_FUNC("unsupported call to non-function", insn->sec, insn->offset); return -1; @@ -1613,7 +1765,7 @@ static int handle_group_alt(struct objtool_file *file, nop->offset = special_alt->new_off + special_alt->new_len; nop->len = special_alt->orig_len - special_alt->new_len; nop->type = INSN_NOP; - nop->func = orig_insn->func; + nop->sym = orig_insn->sym; nop->alt_group = new_alt_group; nop->ignore = orig_insn->ignore_alts; } @@ -1633,7 +1785,7 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; insn->ignore = orig_insn->ignore_alts; - insn->func = orig_insn->func; + insn->sym = orig_insn->sym; insn->alt_group = new_alt_group; /* @@ -1645,7 +1797,7 @@ static int handle_group_alt(struct objtool_file *file, * accordingly. */ alt_reloc = insn_reloc(file, insn); - if (alt_reloc && + if (alt_reloc && arch_pc_relative_reloc(alt_reloc) && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { WARN_FUNC("unsupported relocation in alternatives section", @@ -1827,7 +1979,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *reloc = table; struct instruction *dest_insn; struct alternative *alt; - struct symbol *pfunc = insn->func->pfunc; + struct symbol *pfunc = insn_func(insn)->pfunc; unsigned int prev_offset = 0; /* @@ -1854,7 +2006,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, break; /* Make sure the destination is in the same function: */ - if (!dest_insn->func || dest_insn->func->pfunc != pfunc) + if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -1894,7 +2046,7 @@ static struct reloc *find_jump_table(struct objtool_file *file, * it. */ for (; - insn && insn->func && insn->func->pfunc == func; + insn && insn_func(insn) && insn_func(insn)->pfunc == func; insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) @@ -1911,7 +2063,7 @@ static struct reloc *find_jump_table(struct objtool_file *file, if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); - if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) + if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; return table_reloc; @@ -2360,6 +2512,13 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_{jump_call}_destination. + */ + ret = classify_symbols(file); + if (ret) + return ret; + ret = decode_instructions(file); if (ret) return ret; @@ -2379,13 +2538,6 @@ static int decode_sections(struct objtool_file *file) return ret; /* - * Must be before add_{jump_call}_destination. - */ - ret = classify_symbols(file); - if (ret) - return ret; - - /* * Must be before add_jump_destinations(), which depends on 'func' * being set for alternatives, to enable proper sibling call detection. */ @@ -2593,7 +2745,7 @@ static int update_cfi_state(struct instruction *insn, /* stack operations don't make sense with an undefined CFA */ if (cfa->base == CFI_UNDEFINED) { - if (insn->func) { + if (insn_func(insn)) { WARN_FUNC("undefined stack state", insn->sec, insn->offset); return -1; } @@ -2939,7 +3091,7 @@ static int update_cfi_state(struct instruction *insn, } /* detect when asm code uses rbp as a scratch register */ - if (opts.stackval && insn->func && op->src.reg == CFI_BP && + if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP && cfa->base != CFI_BP) cfi->bp_scratch = true; break; @@ -3249,7 +3401,7 @@ static int validate_sibling_call(struct objtool_file *file, struct instruction *insn, struct insn_state *state) { - if (has_modified_stack_frame(insn, state)) { + if (insn_func(insn) && has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", insn->sec, insn->offset); return 1; @@ -3335,13 +3487,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, while (1) { next_insn = next_insn_to_validate(file, insn); - if (func && insn->func && func != insn->func->pfunc) { + if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { /* Ignore KCFI type preambles, which always fall through */ - if (!strncmp(func->name, "__cfi_", 6)) + if (!strncmp(func->name, "__cfi_", 6) || + !strncmp(func->name, "__pfx_", 6)) return 0; WARN("%s() falls through to next function %s()", - func->name, insn->func->name); + func->name, insn_func(insn)->name); return 1; } @@ -3583,7 +3736,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { if (insn->hint && !insn->visited && !insn->ignore) { - ret = validate_branch(file, insn->func, insn, state); + ret = validate_branch(file, insn_func(insn), insn, state); if (ret && opts.backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; @@ -3748,13 +3901,7 @@ static int validate_retpoline(struct objtool_file *file) if (insn->retpoline_safe) continue; - /* - * .init.text code is ran before userspace and thus doesn't - * strictly need retpolines, except for modules which are - * loaded late, they very much do need retpoline in their - * .init.text - */ - if (!strcmp(insn->sec->name, ".init.text") && !opts.module) + if (insn->sec->init) continue; if (insn->type == INSN_RETURN) { @@ -3812,7 +3959,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * In this case we'll find a piece of code (whole function) that is not * covered by a !section symbol. Ignore them. */ - if (opts.link && !insn->func) { + if (opts.link && !insn_func(insn)) { int size = find_symbol_hole_containing(insn->sec, insn->offset); unsigned long end = insn->offset + size; @@ -3836,10 +3983,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio /* * If this hole jumps to a .cold function, mark it ignore too. */ - if (insn->jump_dest && insn->jump_dest->func && - strstr(insn->jump_dest->func->name, ".cold")) { + if (insn->jump_dest && insn_func(insn->jump_dest) && + strstr(insn_func(insn->jump_dest)->name, ".cold")) { struct instruction *dest = insn->jump_dest; - func_for_each_insn(file, dest->func, dest) + func_for_each_insn(file, insn_func(dest), dest) dest->ignore = true; } } @@ -3847,10 +3994,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return false; } - if (!insn->func) + if (!insn_func(insn)) return false; - if (insn->func->static_call_tramp) + if (insn_func(insn)->static_call_tramp) return true; /* @@ -3881,7 +4028,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio if (insn->type == INSN_JUMP_UNCONDITIONAL) { if (insn->jump_dest && - insn->jump_dest->func == insn->func) { + insn_func(insn->jump_dest) == insn_func(insn)) { insn = insn->jump_dest; continue; } @@ -3889,7 +4036,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio break; } - if (insn->offset + insn->len >= insn->func->offset + insn->func->len) + if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len) break; insn = list_next_entry(insn, list); @@ -3898,6 +4045,54 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return false; } +static int add_prefix_symbol(struct objtool_file *file, struct symbol *func, + struct instruction *insn) +{ + if (!opts.prefix) + return 0; + + for (;;) { + struct instruction *prev = list_prev_entry(insn, list); + u64 offset; + + if (&prev->list == &file->insn_list) + break; + + if (prev->type != INSN_NOP) + break; + + offset = func->offset - prev->offset; + if (offset >= opts.prefix) { + if (offset == opts.prefix) { + /* + * Since the sec->symbol_list is ordered by + * offset (see elf_add_symbol()) the added + * symbol will not be seen by the iteration in + * validate_section(). + * + * Hence the lack of list_for_each_entry_safe() + * there. + * + * The direct concequence is that prefix symbols + * don't get visited (because pointless), except + * for the logic in ignore_unreachable_insn() + * that needs the terminating insn to be visited + * otherwise it will report the hole. + * + * Hence mark the first instruction of the + * prefix symbol as visisted. + */ + prev->visited |= VISITED_BRANCH; + elf_create_prefix_symbol(file->elf, func, opts.prefix); + } + break; + } + insn = prev; + } + + return 0; +} + static int validate_symbol(struct objtool_file *file, struct section *sec, struct symbol *sym, struct insn_state *state) { @@ -3916,9 +4111,11 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, if (!insn || insn->ignore || insn->visited) return 0; + add_prefix_symbol(file, sym, insn); + state->uaccess = sym->uaccess_safe; - ret = validate_branch(file, insn->func, insn, *state); + ret = validate_branch(file, insn_func(insn), insn, *state); if (ret && opts.backtrace) BT_FUNC("<=== (sym)", insn); return ret; @@ -3984,6 +4181,24 @@ static void mark_endbr_used(struct instruction *insn) list_del_init(&insn->call_node); } +static bool noendbr_range(struct objtool_file *file, struct instruction *insn) +{ + struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1); + struct instruction *first; + + if (!sym) + return false; + + first = find_insn(file, sym->sec, sym->offset); + if (!first) + return false; + + if (first->type != INSN_ENDBR && !first->noendbr) + return false; + + return insn->offset == sym->offset + sym->len; +} + static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) { struct instruction *dest; @@ -4037,7 +4252,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; } - if (dest->func && dest->func == insn->func) { + if (insn_func(dest) && insn_func(dest) == insn_func(insn)) { /* * Anything from->to self is either _THIS_IP_ or * IRET-to-self. @@ -4056,9 +4271,19 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; } + /* + * Accept anything ANNOTATE_NOENDBR. + */ if (dest->noendbr) continue; + /* + * Accept if this is the instruction after a symbol + * that is (no)endbr -- typical code-range usage. + */ + if (noendbr_range(file, dest)) + continue; + WARN_FUNC("relocation to !ENDBR: %s", insn->sec, insn->offset, offstr(dest->sec, dest->offset)); @@ -4297,11 +4522,25 @@ int check(struct objtool_file *file) warnings += ret; } + if (opts.cfi) { + ret = create_cfi_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (opts.rethunk) { ret = create_return_sites_sections(file); if (ret < 0) goto out; warnings += ret; + + if (opts.hack_skylake) { + ret = create_direct_call_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } } if (opts.mcount) { diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7e24b09b1163..8cd7f018002c 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -16,6 +16,7 @@ #include <string.h> #include <unistd.h> #include <errno.h> +#include <linux/interval_tree_generic.h> #include <objtool/builtin.h> #include <objtool/elf.h> @@ -50,38 +51,22 @@ static inline u32 str_hash(const char *str) __elf_table(name); \ }) -static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b) +static inline unsigned long __sym_start(struct symbol *s) { - struct symbol *sa = rb_entry(a, struct symbol, node); - struct symbol *sb = rb_entry(b, struct symbol, node); - - if (sa->offset < sb->offset) - return true; - if (sa->offset > sb->offset) - return false; - - if (sa->len < sb->len) - return true; - if (sa->len > sb->len) - return false; - - sa->alias = sb; - - return false; + return s->offset; } -static int symbol_by_offset(const void *key, const struct rb_node *node) +static inline unsigned long __sym_last(struct symbol *s) { - const struct symbol *s = rb_entry(node, struct symbol, node); - const unsigned long *o = key; + return s->offset + s->len - 1; +} - if (*o < s->offset) - return -1; - if (*o >= s->offset + s->len) - return 1; +INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last, + __sym_start, __sym_last, static, __sym) - return 0; -} +#define __sym_for_each(_iter, _tree, _start, _end) \ + for (_iter = __sym_iter_first((_tree), (_start), (_end)); \ + _iter; _iter = __sym_iter_next(_iter, (_start), (_end))) struct symbol_hole { unsigned long key; @@ -147,13 +132,12 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) { - struct rb_node *node; - - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - if (s->offset == offset && s->type != STT_SECTION) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->offset == offset && iter->type != STT_SECTION) + return iter; } return NULL; @@ -161,13 +145,12 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) { - struct rb_node *node; + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); - - if (s->offset == offset && s->type == STT_FUNC) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->offset == offset && iter->type == STT_FUNC) + return iter; } return NULL; @@ -175,13 +158,12 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset) { - struct rb_node *node; - - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - if (s->type != STT_SECTION) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->type != STT_SECTION) + return iter; } return NULL; @@ -202,7 +184,7 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) /* * Find the rightmost symbol for which @offset is after it. */ - n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset); + n = rb_find(&hole, &sec->symbol_tree.rb_root, symbol_hole_by_offset); /* found a symbol that contains @offset */ if (n) @@ -224,13 +206,12 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) struct symbol *find_func_containing(struct section *sec, unsigned long offset) { - struct rb_node *node; - - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { - struct symbol *s = rb_entry(node, struct symbol, node); + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; + struct symbol *iter; - if (s->type == STT_FUNC) - return s; + __sym_for_each(iter, tree, offset, offset) { + if (iter->type == STT_FUNC) + return iter; } return NULL; @@ -373,7 +354,9 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) { struct list_head *entry; struct rb_node *pnode; + struct symbol *iter; + INIT_LIST_HEAD(&sym->reloc_list); INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; @@ -386,7 +369,12 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) sym->offset = sym->sym.st_value; sym->len = sym->sym.st_size; - rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); + __sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) { + if (iter->offset == sym->offset && iter->type == sym->type) + iter->alias = sym; + } + + __sym_insert(sym, &sym->sec->symbol_tree); pnode = rb_prev(&sym->node); if (pnode) entry = &rb_entry(pnode, struct symbol, node)->list; @@ -401,7 +389,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) * can exist within a function, confusing the sorting. */ if (!sym->len) - rb_erase(&sym->node, &sym->sec->symbol_tree); + __sym_remove(sym, &sym->sec->symbol_tree); } static int read_symbols(struct elf *elf) @@ -570,6 +558,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, reloc->sym = sym; reloc->addend = addend; + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); @@ -586,21 +575,10 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, */ static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) { - struct section *sec; - - list_for_each_entry(sec, &elf->sections, list) { - struct reloc *reloc; - - if (sec->changed) - continue; + struct reloc *reloc; - list_for_each_entry(reloc, &sec->reloc_list, list) { - if (reloc->sym == sym) { - sec->changed = true; - break; - } - } - } + list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry) + reloc->sec->changed = true; } /* @@ -647,6 +625,12 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, /* end-of-list */ if (!symtab_data) { + /* + * Over-allocate to avoid O(n^2) symbol creation + * behaviour. The down side is that libelf doesn't + * like this; see elf_truncate_section() for the fixup. + */ + int num = max(1U, sym->idx/3); void *buf; if (idx) { @@ -660,28 +644,34 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, if (t) shndx_data = elf_newdata(t); - buf = calloc(1, entsize); + buf = calloc(num, entsize); if (!buf) { WARN("malloc"); return -1; } symtab_data->d_buf = buf; - symtab_data->d_size = entsize; + symtab_data->d_size = num * entsize; symtab_data->d_align = 1; symtab_data->d_type = ELF_T_SYM; - symtab->sh.sh_size += entsize; symtab->changed = true; + symtab->truncate = true; if (t) { - shndx_data->d_buf = &sym->sec->idx; - shndx_data->d_size = sizeof(Elf32_Word); + buf = calloc(num, sizeof(Elf32_Word)); + if (!buf) { + WARN("malloc"); + return -1; + } + + shndx_data->d_buf = buf; + shndx_data->d_size = num * sizeof(Elf32_Word); shndx_data->d_align = sizeof(Elf32_Word); shndx_data->d_type = ELF_T_WORD; - symtab_shndx->sh.sh_size += sizeof(Elf32_Word); symtab_shndx->changed = true; + symtab_shndx->truncate = true; } break; @@ -730,11 +720,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } static struct symbol * -elf_create_section_symbol(struct elf *elf, struct section *sec) +__elf_create_symbol(struct elf *elf, struct symbol *sym) { struct section *symtab, *symtab_shndx; Elf32_Word first_non_local, new_idx; - struct symbol *sym, *old; + struct symbol *old; symtab = find_section_by_name(elf, ".symtab"); if (symtab) { @@ -744,27 +734,16 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) return NULL; } - sym = calloc(1, sizeof(*sym)); - if (!sym) { - perror("malloc"); - return NULL; - } - - sym->name = sec->name; - sym->sec = sec; + new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; - // st_name 0 - sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); - // st_other 0 - // st_value 0 - // st_size 0 + if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL) + goto non_local; /* * Move the first global symbol, as per sh_info, into a new, higher * symbol index. This fees up a spot for a new local symbol. */ first_non_local = symtab->sh.sh_info; - new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; old = find_symbol_by_index(elf, first_non_local); if (old) { old->idx = new_idx; @@ -782,18 +761,82 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) new_idx = first_non_local; } + /* + * Either way, we will add a LOCAL symbol. + */ + symtab->sh.sh_info += 1; + +non_local: sym->idx = new_idx; if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { WARN("elf_update_symbol"); return NULL; } - /* - * Either way, we added a LOCAL symbol. - */ - symtab->sh.sh_info += 1; + symtab->sh.sh_size += symtab->sh.sh_entsize; + symtab->changed = true; - elf_add_symbol(elf, sym); + if (symtab_shndx) { + symtab_shndx->sh.sh_size += sizeof(Elf32_Word); + symtab_shndx->changed = true; + } + + return sym; +} + +static struct symbol * +elf_create_section_symbol(struct elf *elf, struct section *sec) +{ + struct symbol *sym = calloc(1, sizeof(*sym)); + + if (!sym) { + perror("malloc"); + return NULL; + } + + sym->name = sec->name; + sym->sec = sec; + + // st_name 0 + sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); + // st_other 0 + // st_value 0 + // st_size 0 + + sym = __elf_create_symbol(elf, sym); + if (sym) + elf_add_symbol(elf, sym); + + return sym; +} + +static int elf_add_string(struct elf *elf, struct section *strtab, char *str); + +struct symbol * +elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size) +{ + struct symbol *sym = calloc(1, sizeof(*sym)); + size_t namelen = strlen(orig->name) + sizeof("__pfx_"); + char *name = malloc(namelen); + + if (!sym || !name) { + perror("malloc"); + return NULL; + } + + snprintf(name, namelen, "__pfx_%s", orig->name); + + sym->name = name; + sym->sec = orig->sec; + + sym->sym.st_name = elf_add_string(elf, NULL, name); + sym->sym.st_info = orig->sym.st_info; + sym->sym.st_value = orig->sym.st_value - size; + sym->sym.st_size = size; + + sym = __elf_create_symbol(elf, sym); + if (sym) + elf_add_symbol(elf, sym); return sym; } @@ -850,11 +893,12 @@ static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsi static int read_relocs(struct elf *elf) { + unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; struct section *sec; struct reloc *reloc; - int i; unsigned int symndx; - unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; + struct symbol *sym; + int i; if (!elf_alloc_hash(reloc, elf->text_size / 16)) return -1; @@ -895,13 +939,14 @@ static int read_relocs(struct elf *elf) reloc->sec = sec; reloc->idx = i; - reloc->sym = find_symbol_by_index(elf, symndx); + reloc->sym = sym = find_symbol_by_index(elf, symndx); if (!reloc->sym) { WARN("can't find reloc entry symbol %d for %s", symndx, sec->name); return -1; } + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); list_add_tail(&reloc->list, &sec->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); @@ -1285,6 +1330,60 @@ int elf_write_reloc(struct elf *elf, struct reloc *reloc) return 0; } +/* + * When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size + * do you: + * + * A) adhere to the section header and truncate the data, or + * B) ignore the section header and write out all the data you've got? + * + * Yes, libelf sucks and we need to manually truncate if we over-allocate data. + */ +static int elf_truncate_section(struct elf *elf, struct section *sec) +{ + u64 size = sec->sh.sh_size; + bool truncated = false; + Elf_Data *data = NULL; + Elf_Scn *s; + + s = elf_getscn(elf->elf, sec->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + for (;;) { + /* get next data descriptor for the relevant section */ + data = elf_getdata(s, data); + + if (!data) { + if (size) { + WARN("end of section data but non-zero size left\n"); + return -1; + } + return 0; + } + + if (truncated) { + /* when we remove symbols */ + WARN("truncated; but more data\n"); + return -1; + } + + if (!data->d_size) { + WARN("zero size data"); + return -1; + } + + if (data->d_size > size) { + truncated = true; + data->d_size = size; + } + + size -= data->d_size; + } +} + int elf_write(struct elf *elf) { struct section *sec; @@ -1295,6 +1394,9 @@ int elf_write(struct elf *elf) /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { + if (sec->truncate) + elf_truncate_section(elf, sec); + if (sec->changed) { s = elf_getscn(elf->elf, sec->idx); if (!s) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index beb2f3aa94ff..fe2ea4b892c3 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -93,4 +93,6 @@ bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); +bool arch_pc_relative_reloc(struct reloc *reloc); + #endif /* _ARCH_H */ diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 42a52f1a0add..c44ff39df80c 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -14,6 +14,7 @@ struct opts { bool dump_orc; bool hack_jump_label; bool hack_noinstr; + bool hack_skylake; bool ibt; bool mcount; bool noinstr; @@ -25,6 +26,8 @@ struct opts { bool stackval; bool static_call; bool uaccess; + int prefix; + bool cfi; /* options: */ bool backtrace; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 036129cebeee..acd7fae59348 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -67,11 +67,21 @@ struct instruction { struct reloc *jump_table; struct reloc *reloc; struct list_head alts; - struct symbol *func; + struct symbol *sym; struct list_head stack_ops; struct cfi_state *cfi; }; +static inline struct symbol *insn_func(struct instruction *insn) +{ + struct symbol *sym = insn->sym; + + if (sym && sym->type != STT_FUNC) + sym = NULL; + + return sym; +} + #define VISITED_BRANCH 0x01 #define VISITED_BRANCH_UACCESS 0x02 #define VISITED_BRANCH_MASK 0x03 diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 16f4067b82ae..bca719b2104b 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -30,7 +30,7 @@ struct section { struct hlist_node hash; struct hlist_node name_hash; GElf_Shdr sh; - struct rb_root symbol_tree; + struct rb_root_cached symbol_tree; struct list_head symbol_list; struct list_head reloc_list; struct section *base, *reloc; @@ -38,7 +38,7 @@ struct section { Elf_Data *data; char *name; int idx; - bool changed, text, rodata, noinstr; + bool changed, text, rodata, noinstr, init, truncate; }; struct symbol { @@ -53,6 +53,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + unsigned long __subtree_last; struct symbol *pfunc, *cfunc, *alias; u8 uaccess_safe : 1; u8 static_call_tramp : 1; @@ -61,6 +62,7 @@ struct symbol { u8 fentry : 1; u8 profiling_func : 1; struct list_head pv_target; + struct list_head reloc_list; }; struct reloc { @@ -72,6 +74,7 @@ struct reloc { }; struct section *sec; struct symbol *sym; + struct list_head sym_reloc_entry; unsigned long offset; unsigned int type; s64 addend; @@ -145,6 +148,8 @@ static inline bool has_multiple_files(struct elf *elf) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); +struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size); + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, s64 addend); int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 7f2d1b095333..6b40977bcdb1 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -28,6 +28,7 @@ struct objtool_file { struct list_head static_call_list; struct list_head mcount_loc_list; struct list_head endbr_list; + struct list_head call_list; bool ignore_unreachables, hints, rodata; unsigned int nr_endbr; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index a7ecc32e3512..6affd8067f83 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -106,6 +106,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); + INIT_LIST_HEAD(&file.call_list); file.ignore_unreachables = opts.no_unreachable; file.hints = false; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index b6667501ebb4..a9eb1ed6bd63 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -131,12 +131,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO", ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", - (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", - (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", - (void **)&val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", &val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", &val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", &val_ptr)); expr__ctx_clear(ctx); ctx->sctx.runtime = 3; @@ -144,20 +141,16 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", - (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", - (void **)&val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", &val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", &val_ptr)); expr__ctx_clear(ctx); TEST_ASSERT_VAL("find ids", expr__find_ids("dash\\-event1 - dash\\-event2", NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", - (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", - (void **)&val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", &val_ptr)); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", &val_ptr)); /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ { @@ -175,7 +168,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, smton ? "EVENT1" : "EVENT2", - (void **)&val_ptr)); + &val_ptr)); expr__ctx_clear(ctx); TEST_ASSERT_VAL("find ids", @@ -184,7 +177,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, corewide ? "EVENT1" : "EVENT2", - (void **)&val_ptr)); + &val_ptr)); } /* The expression is a constant 1.0 without needing to evaluate EVENT1. */ @@ -221,8 +214,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u expr__find_ids("source_count(EVENT1)", NULL, ctx) == 0); TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1); - TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", - (void **)&val_ptr)); + TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", &val_ptr)); expr__ctx_free(ctx); diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index f7b9dbbad97f..e36d8b1610d4 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -987,10 +987,10 @@ static int metric_parse_fake(const char *str) */ i = 1; hashmap__for_each_entry(ctx->ids, cur, bkt) - expr__add_id_val(ctx, strdup(cur->key), i++); + expr__add_id_val(ctx, strdup(cur->pkey), i++); hashmap__for_each_entry(ctx->ids, cur, bkt) { - if (check_parse_fake(cur->key)) { + if (check_parse_fake(cur->pkey)) { pr_err("check_parse_fake failed\n"); goto out; } @@ -1004,7 +1004,7 @@ static int metric_parse_fake(const char *str) */ i = 1024; hashmap__for_each_entry(ctx->ids, cur, bkt) - expr__add_id_val(ctx, strdup(cur->key), i--); + expr__add_id_val(ctx, strdup(cur->pkey), i--); if (expr__parse(&result, ctx, str)) { pr_err("expr__parse failed\n"); ret = -1; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index b3c8174360bf..6e9b06cf06ee 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -314,7 +314,7 @@ static void bpf_program_hash_free(void) return; hashmap__for_each_entry(bpf_program_hash, cur, bkt) - clear_prog_priv(cur->key, cur->value); + clear_prog_priv(cur->pkey, cur->pvalue); hashmap__free(bpf_program_hash); bpf_program_hash = NULL; @@ -335,13 +335,12 @@ void bpf__clear(void) bpf_map_hash_free(); } -static size_t ptr_hash(const void *__key, void *ctx __maybe_unused) +static size_t ptr_hash(const long __key, void *ctx __maybe_unused) { - return (size_t) __key; + return __key; } -static bool ptr_equal(const void *key1, const void *key2, - void *ctx __maybe_unused) +static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused) { return key1 == key2; } @@ -1181,7 +1180,7 @@ static void bpf_map_hash_free(void) return; hashmap__for_each_entry(bpf_map_hash, cur, bkt) - bpf_map_priv__clear(cur->key, cur->value); + bpf_map_priv__clear(cur->pkey, cur->pvalue); hashmap__free(bpf_map_hash); bpf_map_hash = NULL; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 77b2cf5a214e..999dd1700502 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3123,7 +3123,7 @@ void evsel__zero_per_pkg(struct evsel *evsel) if (evsel->per_pkg_mask) { hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt) - free((char *)cur->key); + free((void *)cur->pkey); hashmap__clear(evsel->per_pkg_mask); } diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 140f2acdb325..00dcde35e0d3 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -47,7 +47,7 @@ struct expr_id_data { } kind; }; -static size_t key_hash(const void *key, void *ctx __maybe_unused) +static size_t key_hash(long key, void *ctx __maybe_unused) { const char *str = (const char *)key; size_t hash = 0; @@ -60,8 +60,7 @@ static size_t key_hash(const void *key, void *ctx __maybe_unused) return hash; } -static bool key_equal(const void *key1, const void *key2, - void *ctx __maybe_unused) +static bool key_equal(long key1, long key2, void *ctx __maybe_unused) { return !strcmp((const char *)key1, (const char *)key2); } @@ -85,8 +84,8 @@ void ids__free(struct hashmap *ids) return; hashmap__for_each_entry(ids, cur, bkt) { - free((char *)cur->key); - free(cur->value); + free((void *)cur->pkey); + free((void *)cur->pvalue); } hashmap__free(ids); @@ -98,8 +97,7 @@ int ids__insert(struct hashmap *ids, const char *id) char *old_key = NULL; int ret; - ret = hashmap__set(ids, id, data_ptr, - (const void **)&old_key, (void **)&old_data); + ret = hashmap__set(ids, id, data_ptr, &old_key, &old_data); if (ret) free(data_ptr); free(old_key); @@ -128,8 +126,7 @@ struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) ids2 = tmp; } hashmap__for_each_entry(ids2, cur, bkt) { - ret = hashmap__set(ids1, cur->key, cur->value, - (const void **)&old_key, (void **)&old_data); + ret = hashmap__set(ids1, cur->key, cur->value, &old_key, &old_data); free(old_key); free(old_data); @@ -170,8 +167,7 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, data_ptr->val.source_count = source_count; data_ptr->kind = EXPR_ID_DATA__VALUE; - ret = hashmap__set(ctx->ids, id, data_ptr, - (const void **)&old_key, (void **)&old_data); + ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data); if (ret) free(data_ptr); free(old_key); @@ -206,8 +202,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) data_ptr->ref.metric_expr = ref->metric_expr; data_ptr->kind = EXPR_ID_DATA__REF; - ret = hashmap__set(ctx->ids, name, data_ptr, - (const void **)&old_key, (void **)&old_data); + ret = hashmap__set(ctx->ids, name, data_ptr, &old_key, &old_data); if (ret) free(data_ptr); @@ -222,7 +217,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { - return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; + return hashmap__find(ctx->ids, id, data) ? 0 : -1; } bool expr__subset_of_ids(struct expr_parse_ctx *haystack, @@ -233,7 +228,7 @@ bool expr__subset_of_ids(struct expr_parse_ctx *haystack, struct expr_id_data *data; hashmap__for_each_entry(needles->ids, cur, bkt) { - if (expr__get_id(haystack, cur->key, &data)) + if (expr__get_id(haystack, cur->pkey, &data)) return false; } return true; @@ -283,8 +278,7 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id) struct expr_id_data *old_val = NULL; char *old_key = NULL; - hashmap__delete(ctx->ids, id, - (const void **)&old_key, (void **)&old_val); + hashmap__delete(ctx->ids, id, &old_key, &old_val); free(old_key); free(old_val); } @@ -315,8 +309,8 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx) size_t bkt; hashmap__for_each_entry(ctx->ids, cur, bkt) { - free((char *)cur->key); - free(cur->value); + free((void *)cur->pkey); + free(cur->pvalue); } hashmap__clear(ctx->ids); } @@ -331,8 +325,8 @@ void expr__ctx_free(struct expr_parse_ctx *ctx) free(ctx->sctx.user_requested_cpu_list); hashmap__for_each_entry(ctx->ids, cur, bkt) { - free((char *)cur->key); - free(cur->value); + free((void *)cur->pkey); + free(cur->pvalue); } hashmap__free(ctx->ids); free(ctx); diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c index aeb09c288716..140ee4055676 100644 --- a/tools/perf/util/hashmap.c +++ b/tools/perf/util/hashmap.c @@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map) } static bool hashmap_find_entry(const struct hashmap *map, - const void *key, size_t hash, + const long key, size_t hash, struct hashmap_entry ***pprev, struct hashmap_entry **entry) { @@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map, return false; } -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value) +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value) { struct hashmap_entry *entry; size_t h; int err; if (old_key) - *old_key = NULL; + *old_key = 0; if (old_value) - *old_value = NULL; + *old_value = 0; h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); if (strategy != HASHMAP_APPEND && @@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value, return 0; } -bool hashmap__find(const struct hashmap *map, const void *key, void **value) +bool hashmap_find(const struct hashmap *map, long key, long *value) { struct hashmap_entry *entry; size_t h; @@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value) return true; } -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value) +bool hashmap_delete(struct hashmap *map, long key, + long *old_key, long *old_value) { struct hashmap_entry **pprev, *entry; size_t h; diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index 10a4c4cd13cf..0a5bf1937a7c 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s) return h; } -typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); -typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); +typedef size_t (*hashmap_hash_fn)(long key, void *ctx); +typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx); +/* + * Hashmap interface is polymorphic, keys and values could be either + * long-sized integers or pointers, this is achieved as follows: + * - interface functions that operate on keys and values are hidden + * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert; + * - these auxiliary macros cast the key and value parameters as + * long or long *, so the user does not have to specify the casts explicitly; + * - for pointer parameters (e.g. old_key) the size of the pointed + * type is verified by hashmap_cast_ptr using _Static_assert; + * - when iterating using hashmap__for_each_* forms + * hasmap_entry->key should be used for integer keys and + * hasmap_entry->pkey should be used for pointer keys, + * same goes for values. + */ struct hashmap_entry { - const void *key; - void *value; + union { + long key; + const void *pkey; + }; + union { + long value; + void *pvalue; + }; struct hashmap_entry *next; }; @@ -102,6 +122,13 @@ enum hashmap_insert_strategy { HASHMAP_APPEND, }; +#define hashmap_cast_ptr(p) ({ \ + _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \ + sizeof(*(p)) == sizeof(long), \ + #p " pointee should be a long-sized integer or a pointer"); \ + (long *)(p); \ +}) + /* * hashmap__insert() adds key/value entry w/ various semantics, depending on * provided strategy value. If a given key/value pair replaced already @@ -109,42 +136,38 @@ enum hashmap_insert_strategy { * through old_key and old_value to allow calling code do proper memory * management. */ -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value); +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value); -static inline int hashmap__add(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); -} +#define hashmap__insert(map, key, value, strategy, old_key, old_value) \ + hashmap_insert((map), (long)(key), (long)(value), (strategy), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -static inline int hashmap__set(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_SET, - old_key, old_value); -} +#define hashmap__add(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL) -static inline int hashmap__update(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_UPDATE, - old_key, old_value); -} +#define hashmap__set(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value)) -static inline int hashmap__append(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); -} +#define hashmap__update(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value)) + +#define hashmap__append(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL) + +bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value); + +#define hashmap__delete(map, key, old_key, old_value) \ + hashmap_delete((map), (long)(key), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value); +bool hashmap_find(const struct hashmap *map, long key, long *value); -bool hashmap__find(const struct hashmap *map, const void *key, void **value); +#define hashmap__find(map, key, value) \ + hashmap_find((map), (long)(key), hashmap_cast_ptr(value)) /* * hashmap__for_each_entry - iterate over all entries in hashmap diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6eac7a60ed27..b9c273ed080a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -290,7 +290,7 @@ static int setup_metric_events(struct hashmap *ids, * combined or shared groups, this metric may not care * about this event. */ - if (hashmap__find(ids, metric_id, (void **)&val_ptr)) { + if (hashmap__find(ids, metric_id, &val_ptr)) { metric_events[matched_events++] = ev; if (matched_events >= ids_size) @@ -651,7 +651,7 @@ static int metricgroup__build_event_string(struct strbuf *events, #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { - const char *sep, *rsep, *id = cur->key; + const char *sep, *rsep, *id = cur->pkey; enum perf_tool_event ev; pr_debug("found event %s\n", id); @@ -832,14 +832,14 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_event pe; - if (metricgroup__find_metric(cur->key, table, &pe)) { + if (metricgroup__find_metric(cur->pkey, table, &pe)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) return -ENOMEM; memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe)); - pending[pending_cnt].key = cur->key; + pending[pending_cnt].key = cur->pkey; pending_cnt++; } } @@ -1320,7 +1320,7 @@ static int build_combined_expr_ctx(const struct list_head *metric_list, list_for_each_entry(m, metric_list, nd) { if (m->has_constraint && !m->modifier) { hashmap__for_each_entry(m->pctx->ids, cur, bkt) { - dup = strdup(cur->key); + dup = strdup(cur->pkey); if (!dup) { ret = -ENOMEM; goto err_out; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 9bde9224a97c..cadb2df23c87 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -399,7 +399,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) i = 0; hashmap__for_each_entry(ctx->ids, cur, bkt) { - const char *metric_name = (const char *)cur->key; + const char *metric_name = cur->pkey; found = false; if (leader) { diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 673f017a211f..534d36d26fc3 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -314,15 +314,14 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) +static size_t pkg_id_hash(long __key, void *ctx __maybe_unused) { uint64_t *key = (uint64_t *) __key; return *key & 0xffffffff; } -static bool pkg_id_equal(const void *__key1, const void *__key2, - void *ctx __maybe_unused) +static bool pkg_id_equal(long __key1, long __key2, void *ctx __maybe_unused) { uint64_t *key1 = (uint64_t *) __key1; uint64_t *key2 = (uint64_t *) __key2; @@ -383,11 +382,11 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, return -ENOMEM; *key = (uint64_t)d << 32 | s; - if (hashmap__find(mask, (void *)key, NULL)) { + if (hashmap__find(mask, key, NULL)) { *skip = true; free(key); } else - ret = hashmap__add(mask, (void *)key, (void *)1); + ret = hashmap__add(mask, key, 1); return ret; } diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile index 1208a105a871..886bba6c58cd 100644 --- a/tools/power/acpi/tools/acpidump/Makefile +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -28,6 +28,7 @@ TOOL_OBJS = \ tbxfroot.o\ utascii.o\ utbuffer.o\ + utcksum.o\ utdebug.o\ utexcep.o\ utglobal.o\ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index d54dde02b87d..ea44b0ed5dcb 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -78,7 +78,9 @@ u8 ap_is_valid_checksum(struct acpi_table_header *table) rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table); status = acpi_tb_validate_rsdp(rsdp); } else { - status = acpi_tb_verify_checksum(table, table->length); + /* We don't have to check for a CDAT here, since CDAT is not in the RSDT/XSDT */ + + status = acpi_ut_verify_checksum(table, table->length); } if (ACPI_FAILURE(status)) { diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index e9b6de314654..59bfa05dec5d 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -131,9 +131,10 @@ UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \ utils/idle_monitor/hsw_ext_idle.o \ utils/idle_monitor/amd_fam14h_idle.o utils/idle_monitor/cpuidle_sysfs.o \ utils/idle_monitor/mperf_monitor.o utils/idle_monitor/cpupower-monitor.o \ + utils/idle_monitor/rapl_monitor.o \ utils/cpupower.o utils/cpufreq-info.o utils/cpufreq-set.o \ utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o \ - utils/cpuidle-set.o + utils/cpuidle-set.o utils/powercap-info.o UTIL_SRC := $(UTIL_OBJS:.o=.c) @@ -143,9 +144,12 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h -LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c -LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o +LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h \ + lib/powercap.h +LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c \ + lib/powercap.c +LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o \ + lib/powercap.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) override CFLAGS += -pipe @@ -276,6 +280,7 @@ install-lib: libcpupower $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h + $(INSTALL_DATA) lib/powercap.h $(DESTDIR)${includedir}/powercap.h install-tools: $(OUTPUT)cpupower $(INSTALL) -d $(DESTDIR)${bindir} @@ -292,6 +297,7 @@ install-man: $(INSTALL_DATA) -D man/cpupower-set.1 $(DESTDIR)${mandir}/man1/cpupower-set.1 $(INSTALL_DATA) -D man/cpupower-info.1 $(DESTDIR)${mandir}/man1/cpupower-info.1 $(INSTALL_DATA) -D man/cpupower-monitor.1 $(DESTDIR)${mandir}/man1/cpupower-monitor.1 + $(INSTALL_DATA) -D man/cpupower-powercap-info.1 $(DESTDIR)${mandir}/man1/cpupower-powercap-info.1 install-gmo: create-gmo $(INSTALL) -d $(DESTDIR)${localedir} @@ -321,6 +327,7 @@ uninstall: - rm -f $(DESTDIR)${mandir}/man1/cpupower-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-info.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-monitor.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-powercap-info.1 - for HLANG in $(LANGUAGES); do \ rm -f $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c new file mode 100644 index 000000000000..0ce29ee4c2e4 --- /dev/null +++ b/tools/power/cpupower/lib/powercap.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (C) 2016 SUSE Software Solutions GmbH + * Thomas Renninger <trenn@suse.de> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <stdio.h> +#include <dirent.h> + +#include "powercap.h" + +static unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +static int sysfs_get_enabled(char *path, int *mode) +{ + int fd; + char yes_no; + + *mode = 0; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + if (read(fd, &yes_no, 1) != 1) { + close(fd); + return -1; + } + + if (yes_no == '1') { + *mode = 1; + return 0; + } else if (yes_no == '0') { + return 0; + } + return -1; +} + +int powercap_get_enabled(int *mode) +{ + char path[SYSFS_PATH_MAX] = PATH_TO_POWERCAP "/intel-rapl/enabled"; + + return sysfs_get_enabled(path, mode); +} + +/* + * Hardcoded, because rapl is the only powercap implementation +- * this needs to get more generic if more powercap implementations + * should show up + */ +int powercap_get_driver(char *driver, int buflen) +{ + char file[SYSFS_PATH_MAX] = PATH_TO_RAPL; + + struct stat statbuf; + + if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) { + driver = ""; + return -1; + } else if (buflen > 10) { + strcpy(driver, "intel-rapl"); + return 0; + } else + return -1; +} + +enum powercap_get64 { + GET_ENERGY_UJ, + GET_MAX_ENERGY_RANGE_UJ, + GET_POWER_UW, + GET_MAX_POWER_RANGE_UW, + MAX_GET_64_FILES +}; + +static const char *powercap_get64_files[MAX_GET_64_FILES] = { + [GET_POWER_UW] = "power_uw", + [GET_MAX_POWER_RANGE_UW] = "max_power_range_uw", + [GET_ENERGY_UJ] = "energy_uj", + [GET_MAX_ENERGY_RANGE_UJ] = "max_energy_range_uj", +}; + +static int sysfs_powercap_get64_val(struct powercap_zone *zone, + enum powercap_get64 which, + uint64_t *val) +{ + char file[SYSFS_PATH_MAX] = PATH_TO_POWERCAP "/"; + int ret; + char buf[MAX_LINE_LEN]; + + strcat(file, zone->sys_name); + strcat(file, "/"); + strcat(file, powercap_get64_files[which]); + + ret = sysfs_read_file(file, buf, MAX_LINE_LEN); + if (ret < 0) + return ret; + if (ret == 0) + return -1; + + *val = strtoll(buf, NULL, 10); + return 0; +} + +int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val) +{ + return sysfs_powercap_get64_val(zone, GET_MAX_ENERGY_RANGE_UJ, val); +} + +int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val) +{ + return sysfs_powercap_get64_val(zone, GET_ENERGY_UJ, val); +} + +int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val) +{ + return sysfs_powercap_get64_val(zone, GET_MAX_POWER_RANGE_UW, val); +} + +int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val) +{ + return sysfs_powercap_get64_val(zone, GET_POWER_UW, val); +} + +int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode) +{ + char path[SYSFS_PATH_MAX] = PATH_TO_POWERCAP; + + if ((strlen(PATH_TO_POWERCAP) + strlen(zone->sys_name)) + + strlen("/enabled") + 1 >= SYSFS_PATH_MAX) + return -1; + + strcat(path, "/"); + strcat(path, zone->sys_name); + strcat(path, "/enabled"); + + return sysfs_get_enabled(path, mode); +} + +int powercap_zone_set_enabled(struct powercap_zone *zone, int mode) +{ + /* To be done if needed */ + return 0; +} + + +int powercap_read_zone(struct powercap_zone *zone) +{ + struct dirent *dent; + DIR *zone_dir; + char sysfs_dir[SYSFS_PATH_MAX] = PATH_TO_POWERCAP; + struct powercap_zone *child_zone; + char file[SYSFS_PATH_MAX] = PATH_TO_POWERCAP; + int i, ret = 0; + uint64_t val = 0; + + strcat(sysfs_dir, "/"); + strcat(sysfs_dir, zone->sys_name); + + zone_dir = opendir(sysfs_dir); + if (zone_dir == NULL) + return -1; + + strcat(file, "/"); + strcat(file, zone->sys_name); + strcat(file, "/name"); + sysfs_read_file(file, zone->name, MAX_LINE_LEN); + if (zone->parent) + zone->tree_depth = zone->parent->tree_depth + 1; + ret = powercap_get_energy_uj(zone, &val); + if (ret == 0) + zone->has_energy_uj = 1; + ret = powercap_get_power_uw(zone, &val); + if (ret == 0) + zone->has_power_uw = 1; + + while ((dent = readdir(zone_dir)) != NULL) { + struct stat st; + + if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) + continue; + + if (stat(dent->d_name, &st) != 0 || !S_ISDIR(st.st_mode)) + if (fstatat(dirfd(zone_dir), dent->d_name, &st, 0) < 0) + continue; + + if (strncmp(dent->d_name, "intel-rapl:", 11) != 0) + continue; + + child_zone = calloc(1, sizeof(struct powercap_zone)); + if (child_zone == NULL) + return -1; + for (i = 0; i < POWERCAP_MAX_CHILD_ZONES; i++) { + if (zone->children[i] == NULL) { + zone->children[i] = child_zone; + break; + } + if (i == POWERCAP_MAX_CHILD_ZONES - 1) { + free(child_zone); + fprintf(stderr, "Reached POWERCAP_MAX_CHILD_ZONES %d\n", + POWERCAP_MAX_CHILD_ZONES); + return -1; + } + } + strcpy(child_zone->sys_name, zone->sys_name); + strcat(child_zone->sys_name, "/"); + strcat(child_zone->sys_name, dent->d_name); + child_zone->parent = zone; + if (zone->tree_depth >= POWERCAP_MAX_TREE_DEPTH) { + fprintf(stderr, "Maximum zone hierarchy depth[%d] reached\n", + POWERCAP_MAX_TREE_DEPTH); + ret = -1; + break; + } + powercap_read_zone(child_zone); + } + closedir(zone_dir); + return ret; +} + +struct powercap_zone *powercap_init_zones(void) +{ + int enabled; + struct powercap_zone *root_zone; + int ret; + char file[SYSFS_PATH_MAX] = PATH_TO_RAPL "/enabled"; + + ret = sysfs_get_enabled(file, &enabled); + + if (ret) + return NULL; + + if (!enabled) + return NULL; + + root_zone = calloc(1, sizeof(struct powercap_zone)); + if (!root_zone) + return NULL; + + strcpy(root_zone->sys_name, "intel-rapl/intel-rapl:0"); + + powercap_read_zone(root_zone); + + return root_zone; +} + +/* Call function *f on the passed zone and all its children */ + +int powercap_walk_zones(struct powercap_zone *zone, + int (*f)(struct powercap_zone *zone)) +{ + int i, ret; + + if (!zone) + return -1; + + ret = f(zone); + if (ret) + return ret; + + for (i = 0; i < POWERCAP_MAX_CHILD_ZONES; i++) { + if (zone->children[i] != NULL) + powercap_walk_zones(zone->children[i], f); + } + return 0; +} diff --git a/tools/power/cpupower/lib/powercap.h b/tools/power/cpupower/lib/powercap.h new file mode 100644 index 000000000000..c049c109f22f --- /dev/null +++ b/tools/power/cpupower/lib/powercap.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * (C) 2016 SUSE Software Solutions GmbH + * Thomas Renninger <trenn@suse.de> + */ + +#ifndef __CPUPOWER_RAPL_H__ +#define __CPUPOWER_RAPL_H__ + +#define PATH_TO_POWERCAP "/sys/devices/virtual/powercap" +#define PATH_TO_RAPL "/sys/devices/virtual/powercap/intel-rapl" +#define PATH_TO_RAPL_CLASS "/sys/devices/virtual/powercap/intel-rapl" + +#define POWERCAP_MAX_CHILD_ZONES 10 +#define POWERCAP_MAX_TREE_DEPTH 10 + +#define MAX_LINE_LEN 4096 +#define SYSFS_PATH_MAX 255 + +#include <stdint.h> + +struct powercap_zone { + char name[MAX_LINE_LEN]; + /* + * sys_name relative to PATH_TO_POWERCAP, + * do not forget the / in between + */ + char sys_name[SYSFS_PATH_MAX]; + int tree_depth; + struct powercap_zone *parent; + struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES]; + /* More possible caps or attributes to be added? */ + uint32_t has_power_uw:1, + has_energy_uj:1; + +}; + +int powercap_walk_zones(struct powercap_zone *zone, + int (*f)(struct powercap_zone *zone)); + +struct powercap_zone *powercap_init_zones(void); +int powercap_get_enabled(int *mode); +int powercap_set_enabled(int mode); +int powercap_get_driver(char *driver, int buflen); + +int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val); +int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val); +int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val); +int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val); +int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode); +int powercap_zone_set_enabled(struct powercap_zone *zone, int mode); + + +#endif /* __CPUPOWER_RAPL_H__ */ diff --git a/tools/power/cpupower/man/cpupower-powercap-info.1 b/tools/power/cpupower/man/cpupower-powercap-info.1 new file mode 100644 index 000000000000..df3087000efb --- /dev/null +++ b/tools/power/cpupower/man/cpupower-powercap-info.1 @@ -0,0 +1,25 @@ +.TH CPUPOWER\-POWERCAP\-INFO "1" "05/08/2016" "" "cpupower Manual" +.SH NAME +cpupower\-powercap\-info \- Shows powercapping related kernel and hardware configurations +.SH SYNOPSIS +.ft B +.B cpupower powercap-info + +.SH DESCRIPTION +\fBcpupower powercap-info \fP shows kernel powercapping subsystem information. +This needs hardware support and a loaded powercapping driver (at this time only +intel_rapl driver exits) exporting hardware values userspace via sysfs. + +Some options are platform wide, some affect single cores. By default values +of core zero are displayed only. cpupower --cpu all cpuinfo will show the +settings of all cores, see cpupower(1) how to choose specific cores. + +.SH "DOCUMENTATION" + +kernel sources: +Documentation/power/powercap/powercap.txt + + +.SH "SEE ALSO" + +cpupower(1) diff --git a/tools/power/cpupower/po/ka.po b/tools/power/cpupower/po/ka.po new file mode 100644 index 000000000000..ef71dbac5a13 --- /dev/null +++ b/tools/power/cpupower/po/ka.po @@ -0,0 +1,983 @@ +# Georgian translation for cpufrequtils package +# Georgian messages for cpufrequtils. +# Copyright (C) 2004-2022 Dominik Brodowski <linux@dominikbrodowski.net> +# This file is distributed under the same license as the cpufrequtils package. +# Ekaterine Papava <katopapava@gmail.com>, 2022. + +msgid "" +msgstr "" +"Project-Id-Version: cpufrequtils 006\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2011-03-08 17:03+0100\n" +"PO-Revision-Date: 2022-09-18 22:12+0200\n" +"Last-Translator: Ekaterine Papava <katopapava@gmail.com>\n" +"Language-Team: NONE\n" +"Language: ka\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"X-Generator: Poedit 3.1.1\n" + +#: utils/idle_monitor/nhm_idle.c:36 +msgid "Processor Core C3" +msgstr "პროცესორის ბირთვი C3" + +#: utils/idle_monitor/nhm_idle.c:43 +msgid "Processor Core C6" +msgstr "პროცესორის ბირთვი C6" + +#: utils/idle_monitor/nhm_idle.c:51 +msgid "Processor Package C3" +msgstr "პროცესორის პაკეტი C3" + +#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70 +msgid "Processor Package C6" +msgstr "პროცესორის პაკეტი C6" + +#: utils/idle_monitor/snb_idle.c:33 +msgid "Processor Core C7" +msgstr "პროცესორის Core C7" + +#: utils/idle_monitor/snb_idle.c:40 +msgid "Processor Package C2" +msgstr "პროცესორის პაკეტი C2" + +#: utils/idle_monitor/snb_idle.c:47 +msgid "Processor Package C7" +msgstr "პროცესორის პაკეტი C7" + +#: utils/idle_monitor/amd_fam14h_idle.c:56 +msgid "Package in sleep state (PC1 or deeper)" +msgstr "პაკეტი ძილის მდგომარეობაში (PC1 ან ღრმა)" + +#: utils/idle_monitor/amd_fam14h_idle.c:63 +msgid "Processor Package C1" +msgstr "პროცესორის პაკეტი C1" + +#: utils/idle_monitor/amd_fam14h_idle.c:77 +msgid "North Bridge P1 boolean counter (returns 0 or 1)" +msgstr "ჩრდილო ხიდის P1 ლოგიკური მთვლელი (აბრუნებს 0 ან 1-ს)" + +#: utils/idle_monitor/mperf_monitor.c:35 +msgid "Processor Core not idle" +msgstr "პროცესორის ბირთვი უქმი არაა" + +#: utils/idle_monitor/mperf_monitor.c:42 +msgid "Processor Core in an idle state" +msgstr "პროცესორის ბირთვი უქმ მდგომარეობაში არაა" + +#: utils/idle_monitor/mperf_monitor.c:50 +msgid "Average Frequency (including boost) in MHz" +msgstr "საშუალო სიხშირე (პიკურის ჩათვლით) მეგაჰერცებში" + +#: utils/idle_monitor/cpupower-monitor.c:66 +#, c-format +msgid "" +"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"ინტერვალი_წმ | -c ბრძანება ...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:69 +#, c-format +msgid "" +"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"ინტერვალი_წმ | -c ბრძანება ...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:71 +#, c-format +msgid "\t -v: be more verbose\n" +msgstr "\t -v: დამატებითი ინფორმაციის გამოტანა\n" + +#: utils/idle_monitor/cpupower-monitor.c:73 +#, c-format +msgid "\t -h: print this help\n" +msgstr "\t -h: ამ დახმარების გამოტანა\n" + +#: utils/idle_monitor/cpupower-monitor.c:74 +#, c-format +msgid "\t -i: time interval to measure for in seconds (default 1)\n" +msgstr "" +"\t -i: გასაზომი დროის ინტერვალი, წამებში (ნაგულისხმები მნიშვნელობაა 1)\n" + +#: utils/idle_monitor/cpupower-monitor.c:75 +#, c-format +msgid "\t -t: show CPU topology/hierarchy\n" +msgstr "\t -t: CPU -ის ტოპოლოგიის/იერარქიის ჩვენება\n" + +#: utils/idle_monitor/cpupower-monitor.c:76 +#, c-format +msgid "\t -l: list available CPU sleep monitors (for use with -m)\n" +msgstr "" +"\t -l: CPU-ის ძილის მონიტორების სიის გამოტანა (განკუთვნილია -m -სთან ერთად " +"გამოსაყენებლად)\n" + +#: utils/idle_monitor/cpupower-monitor.c:77 +#, c-format +msgid "\t -m: show specific CPU sleep monitors only (in same order)\n" +msgstr "" +"\t -m: მხოლოდ მითითებული CPU-ის ძილის მონიტორების ჩვენება (იგივე " +"მიმდევრობით)\n" + +#: utils/idle_monitor/cpupower-monitor.c:79 +#, c-format +msgid "" +"only one of: -t, -l, -m are allowed\n" +"If none of them is passed," +msgstr "" +"დასაშვებია მხოლოდ ერთ-ერთის: -t, -l ან -m მითითება\n" +"თუ მითითებული არც ერთი არაა," + +#: utils/idle_monitor/cpupower-monitor.c:80 +#, c-format +msgid " all supported monitors are shown\n" +msgstr " ნაჩვენები იქნება ყველა მხარდაჭერილი მონიტორი\n" + +#: utils/idle_monitor/cpupower-monitor.c:197 +#, c-format +msgid "Monitor %s, Counter %s has no count function. Implementation error\n" +msgstr "" +"მონიტორი %s, მთვლელი %s. დათვლის ფუნქცია არ გააჩნია. განხორციელების შეცდომა\n" + +#: utils/idle_monitor/cpupower-monitor.c:207 +#, c-format +msgid " *is offline\n" +msgstr " *გათიშულია\n" + +#: utils/idle_monitor/cpupower-monitor.c:236 +#, c-format +msgid "%s: max monitor name length (%d) exceeded\n" +msgstr "%s: მონიტორის სახელის მაქსიმალური სიგრძე (%d) გადაჭარბებულია\n" + +#: utils/idle_monitor/cpupower-monitor.c:250 +#, c-format +msgid "No matching monitor found in %s, try -l option\n" +msgstr "%s-ში აღწერილი მონიტორი ვერ ვიპოვე. სცადეთ -l პარამეტრი\n" + +#: utils/idle_monitor/cpupower-monitor.c:266 +#, c-format +msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n" +msgstr "მონიტორი \"%s\" (%d მდგომარეობა) - გადაივსება %u წამის შემდეგ\n" + +#: utils/idle_monitor/cpupower-monitor.c:319 +#, c-format +msgid "%s took %.5f seconds and exited with status %d\n" +msgstr "%s-ს %.5f წამი დასჭირდა და მუშაობა სტატუსით %d დაასრულა\n" + +#: utils/idle_monitor/cpupower-monitor.c:406 +#, c-format +msgid "Cannot read number of available processors\n" +msgstr "ხელმისაწვდომი პროცესორების რიცხვის წაკითხვა შეუძლებელია\n" + +#: utils/idle_monitor/cpupower-monitor.c:417 +#, c-format +msgid "Available monitor %s needs root access\n" +msgstr "ხელმისაწვდომ მონიტორს (%s) root-ის წვდომს სჭირდება\n" + +#: utils/idle_monitor/cpupower-monitor.c:428 +#, c-format +msgid "No HW Cstate monitors found\n" +msgstr "აპარატურული C-მდგომარეობის მონიტორები ვერ ვიპოვე\n" + +#: utils/cpupower.c:78 +#, c-format +msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n" +msgstr "cpupower [ -c cpu-ებისსია ] ქვებრძანება [არგუმენტები]\n" + +#: utils/cpupower.c:79 +#, c-format +msgid "cpupower --version\n" +msgstr "cpupower --version\n" + +#: utils/cpupower.c:80 +#, c-format +msgid "Supported subcommands are:\n" +msgstr "ხელმისაწვდომი ქვებრძანებებია:\n" + +#: utils/cpupower.c:83 +#, c-format +msgid "" +"\n" +"Some subcommands can make use of the -c cpulist option.\n" +msgstr "" +"\n" +"ზოგიერთ ქვებრძანებას შეუძლია -c cpu-ებისსია პარამეტრი გამოიყენოს.\n" + +#: utils/cpupower.c:84 +#, c-format +msgid "Look at the general cpupower manpage how to use it\n" +msgstr "" +"გამოყენების ინსტრუქციისთვის cpupower-ის სახელმძღვანელოში (manpage) ჩაიხედეთ\n" + +#: utils/cpupower.c:85 +#, c-format +msgid "and read up the subcommand's manpage whether it is supported.\n" +msgstr "" +"და წაიკითხეთ ქვებრძანების სახელმძღვანელო (manpage), თუ ის მხარდაჭერილია.\n" + +#: utils/cpupower.c:86 +#, c-format +msgid "" +"\n" +"Use cpupower help subcommand for getting help for above subcommands.\n" +msgstr "" +"\n" +"ზემოთ მოყვანილი ქვებრძანებების შესახებ ინფორმაციის მისაღებად გამოიყენეთ " +"cpupower help ქვებრძანების_სახელი.\n" + +#: utils/cpupower.c:91 +#, c-format +msgid "Report errors and bugs to %s, please.\n" +msgstr "გთხოვთ, შეცდომების შესახებ გვაცნობეთ აქ; %s.\n" + +#: utils/cpupower.c:114 +#, c-format +msgid "Error parsing cpu list\n" +msgstr "CPU-ების სიის დამუშავების შეცდომა\n" + +#: utils/cpupower.c:172 +#, c-format +msgid "Subcommand %s needs root privileges\n" +msgstr "ქვებრძანებას %s root-ის პრივილეგიები სჭირდება\n" + +#: utils/cpufreq-info.c:31 +#, c-format +msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n" +msgstr "" +"CPU-ების (%s: %s) რაოდენობის მიღების შეცდომა. ჩაითვლება, რომ უდრის 1-ს\n" + +#: utils/cpufreq-info.c:63 +#, c-format +msgid "" +" minimum CPU frequency - maximum CPU frequency - governor\n" +msgstr "" +" CPU-ის მინიმალური სიხშირე - CPU-ის მაქსიმალური სიხშირე - " +"მმართველი\n" + +#: utils/cpufreq-info.c:151 +#, c-format +msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n" +msgstr "" +"შეცდომა %d-ე CPU-ის პიკური დატვირთვის მართვის შესაძლებლობების შეფასებისას -- " +"გაქვთ თუ არა root პრივილეგიები?\n" + +#. P state changes via MSR are identified via cpuid 80000007 +#. on Intel and AMD, but we assume boost capable machines can do that +#. if (cpuid_eax(0x80000000) >= 0x80000007 +#. && (cpuid_edx(0x80000007) & (1 << 7))) +#. +#: utils/cpufreq-info.c:161 +#, c-format +msgid " boost state support: \n" +msgstr " პიკის მდგომარეობის მხარდაჭერა: \n" + +#: utils/cpufreq-info.c:163 +#, c-format +msgid " Supported: %s\n" +msgstr " მხარდაჭერილია: %s\n" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "yes" +msgstr "დიახ" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "no" +msgstr "არა" + +#: utils/cpufreq-info.c:164 +#, c-format +msgid " Active: %s\n" +msgstr " აქტიური: %s\n" + +#: utils/cpufreq-info.c:177 +#, c-format +msgid " Boost States: %d\n" +msgstr " პიკის მდგომარეობები: %d\n" + +#: utils/cpufreq-info.c:178 +#, c-format +msgid " Total States: %d\n" +msgstr " სულ მდგომარეობები: %d\n" + +#: utils/cpufreq-info.c:181 +#, c-format +msgid " Pstate-Pb%d: %luMHz (boost state)\n" +msgstr " Pstate-Pb%d: %luმჰც (პიკში)\n" + +#: utils/cpufreq-info.c:184 +#, c-format +msgid " Pstate-P%d: %luMHz\n" +msgstr " Pstate-P%d: %luმჰც\n" + +#: utils/cpufreq-info.c:211 +#, c-format +msgid " no or unknown cpufreq driver is active on this CPU\n" +msgstr " ამ CPU-ზე cpufreq-ის დრაივერი უცნობია, ან არ არსებობს\n" + +#: utils/cpufreq-info.c:213 +#, c-format +msgid " driver: %s\n" +msgstr " დრაივერი: %s\n" + +#: utils/cpufreq-info.c:219 +#, c-format +msgid " CPUs which run at the same hardware frequency: " +msgstr " CPU-ები, რომლებიც ერთი და იგივე აპარატურულ სიხშირეზე მუშაობენ: " + +#: utils/cpufreq-info.c:230 +#, c-format +msgid " CPUs which need to have their frequency coordinated by software: " +msgstr " CPU-ები, რომლებსაც მათი სიხშირის პროგრამული კოორდინაცია სჭირდებათ: " + +#: utils/cpufreq-info.c:241 +#, c-format +msgid " maximum transition latency: " +msgstr " მაქსიმალური გარდამავალი დაყოვნება: " + +#: utils/cpufreq-info.c:247 +#, c-format +msgid " hardware limits: " +msgstr " აპარატურული ლიმიტები: " + +#: utils/cpufreq-info.c:256 +#, c-format +msgid " available frequency steps: " +msgstr " ხელმისაწვდომი სიხშირის ბიჯები: " + +#: utils/cpufreq-info.c:269 +#, c-format +msgid " available cpufreq governors: " +msgstr " cpufreq -ის ხელმისაწვდომი მმართველები: " + +#: utils/cpufreq-info.c:280 +#, c-format +msgid " current policy: frequency should be within " +msgstr " მიმდინარე პოლიტიკა: სიხშირის დიაპაზონია " + +#: utils/cpufreq-info.c:282 +#, c-format +msgid " and " +msgstr " და " + +#: utils/cpufreq-info.c:286 +#, c-format +msgid "" +"The governor \"%s\" may decide which speed to use\n" +" within this range.\n" +msgstr "" +"მმართველს \"%s\" შეუძლია გადაწყვიტოს, რომელი სიჩქარე გამოიყენოს\n" +" ამ დიაპაზონიდან.\n" + +#: utils/cpufreq-info.c:293 +#, c-format +msgid " current CPU frequency is " +msgstr " CPU-ის მიმდინარე სიხშირეა " + +#: utils/cpufreq-info.c:296 +#, c-format +msgid " (asserted by call to hardware)" +msgstr " (დამტკიცებულია აპარატურული გადამოწმებით)" + +#: utils/cpufreq-info.c:304 +#, c-format +msgid " cpufreq stats: " +msgstr " cpufreq -ის სტატისტიკა: " + +#: utils/cpufreq-info.c:472 +#, c-format +msgid "Usage: cpupower freqinfo [options]\n" +msgstr "გამოყენება: cpupower freqinfo [პარამეტრები]\n" + +#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23 +#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148 +#, c-format +msgid "Options:\n" +msgstr "პარამეტრები:\n" + +#: utils/cpufreq-info.c:474 +#, c-format +msgid " -e, --debug Prints out debug information [default]\n" +msgstr " -e, --debug გამართვის ინფორმაციის ჩვენება [ნაგულისხმები]\n" + +#: utils/cpufreq-info.c:475 +#, c-format +msgid "" +" -f, --freq Get frequency the CPU currently runs at, according\n" +" to the cpufreq core *\n" +msgstr "" +" -f, --freq სიხშირის მიღება, რომლითაც CPU ამჟამად მუშაობს, \n" +" cpufreq ბირთვის შესაბამისად *\n" + +#: utils/cpufreq-info.c:477 +#, c-format +msgid "" +" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n" +" it from hardware (only available to root) *\n" +msgstr "" +" -w, --hwfreq სიხშირის მიღება, რომლითაც CPU ახლა მუშაობს, " +"მნიშვნელობის\n" +" პირდაპირ აპარატურიდან წაკითხვით (საჭიროა root-ის " +"პრივილეგიები) *\n" + +#: utils/cpufreq-info.c:479 +#, c-format +msgid "" +" -l, --hwlimits Determine the minimum and maximum CPU frequency " +"allowed *\n" +msgstr "" +" -l, --hwlimits CPU-ის მინიმალური და მაქსიმალური დასაშვები სიხშირის " +"განსაზღვრა *\n" + +#: utils/cpufreq-info.c:480 +#, c-format +msgid " -d, --driver Determines the used cpufreq kernel driver *\n" +msgstr "" +" -d, --driver ბირთვის მიერ გამოყენებული cpufreq -ის დრაივერი *\n" + +#: utils/cpufreq-info.c:481 +#, c-format +msgid " -p, --policy Gets the currently used cpufreq policy *\n" +msgstr "" +" -p, --policy cpufreq -ის ამჟამად გამოყენებული პოლიტიკის მიღება*\n" + +#: utils/cpufreq-info.c:482 +#, c-format +msgid " -g, --governors Determines available cpufreq governors *\n" +msgstr "" +" -g, --governors cpufreq-ის ხელმისაწვდომი მმართველების დადგენა *\n" + +#: utils/cpufreq-info.c:483 +#, c-format +msgid "" +" -r, --related-cpus Determines which CPUs run at the same hardware " +"frequency *\n" +msgstr "" +" -r, --related-cpus განსაზღვრავს, რომელი CPU-ები მუშაობს ერთი და იგივე " +"აპარატურულ სიხშირეზე *\n" + +#: utils/cpufreq-info.c:484 +#, c-format +msgid "" +" -a, --affected-cpus Determines which CPUs need to have their frequency\n" +" coordinated by software *\n" +msgstr "" +" -a, --affected-cpus განსაზღვრავს, რომელი CPU-ებს სჭირდებათ მათი სიხშირის\n" +" პროგრამული კოორდინაცია *\n" + +#: utils/cpufreq-info.c:486 +#, c-format +msgid " -s, --stats Shows cpufreq statistics if available\n" +msgstr "" +" -s, --stats cpufreq -ის სტატისტიკის ჩვენება, თუ ის " +"ხელმისაწვდომია\n" + +#: utils/cpufreq-info.c:487 +#, c-format +msgid "" +" -y, --latency Determines the maximum latency on CPU frequency " +"changes *\n" +msgstr "" +" -y, --latency CPU -ის სიხშირის ცვლილების მაქსიმალური დაყოვნების " +"დადგენა *\n" + +#: utils/cpufreq-info.c:488 +#, c-format +msgid " -b, --boost Checks for turbo or boost modes *\n" +msgstr " -b, --boost ტურბო და პიკური რეჟიმების შემოწმება *\n" + +#: utils/cpufreq-info.c:489 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"cpufreq\n" +" interface in 2.4. and early 2.6. kernels\n" +msgstr "" +" -o, --proc გამოიტანს ინფორმაციას, რომელიც /proc/cpufreq-ის " +"მიერაა მოწოდებული.\n" +" ეს ინტერფეისი 2.4. და ადრეულ 2.6. ბირთვებში იყო " +"ხელმისაწვდომი\n" + +#: utils/cpufreq-info.c:491 +#, c-format +msgid "" +" -m, --human human-readable output for the -f, -w, -s and -y " +"parameters\n" +msgstr "" +" -m, --human -f, -w, -s და -y პარამეტრების გამოტანის " +"ადამიანისთვის გასაგებ ფორმატში ჩვენება\n" + +#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help ამ ეკრანის გამოტანა\n" + +#: utils/cpufreq-info.c:495 +#, c-format +msgid "" +"If no argument or only the -c, --cpu parameter is given, debug output about\n" +"cpufreq is printed which is useful e.g. for reporting bugs.\n" +msgstr "" +"თუ არგუმენტები საერთოდ არ გადაცემულა ან გადაცემულია -c ან --cpu, მოხდება " +"cpufreq-ის\n" +"დრაივერის დამატებითი შეტყობინებების გამოტანა, რომელიც გამართვისთვისაა " +"საჭირო.\n" + +#: utils/cpufreq-info.c:497 +#, c-format +msgid "" +"For the arguments marked with *, omitting the -c or --cpu argument is\n" +"equivalent to setting it to zero\n" +msgstr "" +"არგუმენტებისთვის, რომლებიც *-ით არიან მონიშნულები, -c/--cpu \n" +"არგუმენტის გამოტოვება მის ნულოვან მნიშვნელობაზე დაყენებას უდრის\n" + +#: utils/cpufreq-info.c:580 +#, c-format +msgid "" +"The argument passed to this tool can't be combined with passing a --cpu " +"argument\n" +msgstr "" +"ამ პროგრამისთვის გადაცემული არგუმენტის --cpu არგუმენტთან გადაცემა ერთად არ " +"შეიძლება\n" + +#: utils/cpufreq-info.c:596 +#, c-format +msgid "" +"You can't specify more than one --cpu parameter and/or\n" +"more than one output-specific argument\n" +msgstr "" +"ერთ --cpu პარამეტრზე ან/და ერთ გამოტანის-შესატყვისი \n" +"არგუმენტზე მეტის მითითება აკრძალულია\n" + +#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42 +#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213 +#, c-format +msgid "invalid or unknown argument\n" +msgstr "არასწორი ან უცნობი არგუმენტი\n" + +#: utils/cpufreq-info.c:617 +#, c-format +msgid "couldn't analyze CPU %d as it doesn't seem to be present\n" +msgstr "%d-ე CPU-ის ანალიზი შეუძლებელია. ის არ არსებობს\n" + +#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142 +#, c-format +msgid "analyzing CPU %d:\n" +msgstr "%d-ე CPU -ის ანალიზი:\n" + +#: utils/cpufreq-set.c:25 +#, c-format +msgid "Usage: cpupower frequency-set [options]\n" +msgstr "გამოყენება: cpupower frequency-set [პარამეტრები]\n" + +#: utils/cpufreq-set.c:27 +#, c-format +msgid "" +" -d FREQ, --min FREQ new minimum CPU frequency the governor may " +"select\n" +msgstr "" +" -d FREQ, --min FREQ CPU-ის ახალი მინიმალური სიხშირე, რომელიც " +"მმართველს შეუძლია, აირჩიოს\n" + +#: utils/cpufreq-set.c:28 +#, c-format +msgid "" +" -u FREQ, --max FREQ new maximum CPU frequency the governor may " +"select\n" +msgstr "" +" -u FREQ, --max FREQ CPU-ის ახალი მაქსიმალური სიხშირე, რომელიც " +"მმართველს შეუძლია, აირჩიოს\n" + +#: utils/cpufreq-set.c:29 +#, c-format +msgid " -g GOV, --governor GOV new cpufreq governor\n" +msgstr " -g GOV, --governor GOV cpufreq-ის ახალი მმართველი\n" + +#: utils/cpufreq-set.c:30 +#, c-format +msgid "" +" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n" +" governor to be available and loaded\n" +msgstr "" +" -f FREQ, --freq FREQ მითითებული სიხშირის დაყენება. საჭიროა, " +"მომხმარებლის სივრცეში გაშვებული\n" +" მმართველი ხელმისაწვდომი და ჩატვირთული იყოს\n" + +#: utils/cpufreq-set.c:32 +#, c-format +msgid " -r, --related Switches all hardware-related CPUs\n" +msgstr "" +" -r, --related ყველა აპარატურულად-დაავშირებული CPU-ის გადართვა\n" + +#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help ამ ეკრანის გამოტანა\n" + +#: utils/cpufreq-set.c:35 +#, c-format +msgid "" +"Notes:\n" +"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n" +msgstr "" +"შენიშვნა:\n" +"1. -c/--cpu პარამეტრის გამოტოვება იგივეა, რაც \"all\" (ყველას) მითითება\n" + +#: utils/cpufreq-set.c:37 +#, c-format +msgid "" +"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other " +"parameter\n" +" except the -c CPU, --cpu CPU parameter\n" +"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n" +" by postfixing the value with the wanted unit name, without any space\n" +" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n" +msgstr "" +"2. The -f FREQ, --freq FREQ პარამეტრის შეთავსება შეუძლებელია სხვა " +"პარამეტრებთან,\n" +" -c CPU, --cpu CPU -ის გარდა\n" +"3. სიხშირეების გადაცემა შეიძლება ჰერცებში (hz), კილოჰერცებში, (KHz) " +"(ნაგულისხმები), მეგაჰერცებში (MHz), GHz და THz.\n" +" რიცხვის შემდეგ შესაბამისი ერთეულის, გამოტოვების გარეშე, მიწერით\n" +" (სიხშირე in kHz(კილოჰერცი) =^ Hz(ჰერცი) * 0.001 =^ MHz(მეგაჰერცი) * 1000 " +"=^ GHz(გიგაჰერცი) * 1000000).\n" + +#: utils/cpufreq-set.c:57 +#, c-format +msgid "" +"Error setting new values. Common errors:\n" +"- Do you have proper administration rights? (super-user?)\n" +"- Is the governor you requested available and modprobed?\n" +"- Trying to set an invalid policy?\n" +"- Trying to set a specific frequency, but userspace governor is not " +"available,\n" +" for example because of hardware which cannot be set to a specific " +"frequency\n" +" or because the userspace governor isn't loaded?\n" +msgstr "" +"ახალი მნიშვნელოების დაყენება შეუძლებელია. ხშირად დაშვებული შეცდომებია:\n" +"- გაქვთ თუ არა ადმინისტრატორის უფლებები? (მომხმარებელი root)\n" +"- არის თუ არა მოთხოვნილი მმართველი ხელმისაწვდომი და შესაბამისი მოდული " +"modprobe-ით ჩატვირთული?\n" +"- ცდილობთ დააყენოთ არასწორი პოლიტიკა?\n" +"- ცდილობთ დააყენოთ განსაზღვრული სიხშირე მაშინ, როცა მომხმარებლის სივრცის " +"მმართველი ხელმიუწვდომელია.\n" +" მაგალითად აპარატურის გამო, რომელსაც მითითებული სიხშირის დაყენება არ " +"შეუძლია,\n" +" ან იქნებ მომხმარებლის სივრცის მმართველი ჩატვირთული არაა?\n" + +#: utils/cpufreq-set.c:170 +#, c-format +msgid "wrong, unknown or unhandled CPU?\n" +msgstr "არასწორი, უცნობი ან არასასურველი CPU?\n" + +#: utils/cpufreq-set.c:302 +#, c-format +msgid "" +"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n" +"-g/--governor parameters\n" +msgstr "" +"პარამეტრი -f/--freq არ შეიძლება-d/--min, -u/--max და\n" +"-g/--governor პარამეტრებთან ერთად იყოს მითითებული\n" + +#: utils/cpufreq-set.c:308 +#, c-format +msgid "" +"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n" +"-g/--governor must be passed\n" +msgstr "" +"საჭიროა -f/--freq, -d/--min, -u/--max, and\n" +"-g/--governor პარამეტრებიდან ერთის გადაცემა მაინც\n" + +#: utils/cpufreq-set.c:347 +#, c-format +msgid "Setting cpu: %d\n" +msgstr "CPU-ის დაყენება: %d\n" + +#: utils/cpupower-set.c:22 +#, c-format +msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" +msgstr "გამოყენება: cpupower set [ -b მნიშვნ ] [ -m მნიშვნ ] [ -s მნიშვნ ]\n" + +#: utils/cpupower-set.c:24 +#, c-format +msgid "" +" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias [VAL] Intel-ის ზოგიერთ მოდელზე [0-15] CPU-ის კვებასა და " +"წარმადობას შორის დამოკიდებულების დაყენება\n" +" მეტი დეტალისთვის იხილეთ სახელმძღვანელო (manpage)\n" + +#: utils/cpupower-set.c:26 +#, c-format +msgid "" +" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n" +msgstr "" +" -m, --sched-mc [VAL] ბირთვის მრავალბირთვიანობის მგეგმავის პოლიტიკის " +"დაყენება.\n" + +#: utils/cpupower-set.c:27 +#, c-format +msgid "" +" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler " +"policy.\n" +msgstr "" +" -s, --sched-smt [VAL] ბირთვის ნაკადის დის მგეგმავის პოლიტიკის " +"დაყენება.\n" + +#: utils/cpupower-set.c:80 +#, c-format +msgid "--perf-bias param out of range [0-%d]\n" +msgstr "--perf-bias პარამეტრი დიაპაზონიდან [0-%d]\n" + +#: utils/cpupower-set.c:91 +#, c-format +msgid "--sched-mc param out of range [0-%d]\n" +msgstr "--sched-mc პარამეტრი დიაპაზონიდან [0-%d]\n" + +#: utils/cpupower-set.c:102 +#, c-format +msgid "--sched-smt param out of range [0-%d]\n" +msgstr "--sched-smt პარამეტრი დიაპაზონიდან [0-%d]\n" + +#: utils/cpupower-set.c:121 +#, c-format +msgid "Error setting sched-mc %s\n" +msgstr "შეცდომა sched-mc -ის დაყენებისას: %s\n" + +#: utils/cpupower-set.c:127 +#, c-format +msgid "Error setting sched-smt %s\n" +msgstr "შეცდომა sched-smt-ის დაყენებისას: %s\n" + +#: utils/cpupower-set.c:146 +#, c-format +msgid "Error setting perf-bias value on CPU %d\n" +msgstr "%d-ე CPU-ზე perf-bias -ის მნიშვნელობის დაყენების შეცდომა\n" + +#: utils/cpupower-info.c:21 +#, c-format +msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n" +msgstr "გამოყენება: cpupower info [ -b ] [ -m ] [ -s ]\n" + +#: utils/cpupower-info.c:23 +#, c-format +msgid "" +" -b, --perf-bias Gets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias [VAL] Intel-ის ზოგიერთ მოდელზე [0-15] CPU-ის კვებასა და " +"წარმადობას შორის დამოკიდებულების მიღება\n" +" მეტი დეტალისთვის იხილეთ სახელმძღვანელო (manpage)\n" + +#: utils/cpupower-info.c:25 +#, c-format +msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n" +msgstr "" +" -m, --sched-mc ბირთვის მრავალბირთვიანობის მგეგმავის პოლიტიკის მიღება.\n" + +#: utils/cpupower-info.c:26 +#, c-format +msgid "" +" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n" +msgstr " -s, --sched-smt ბირთვის ნაკადის დის მგეგმავის პოლიტიკის მიღება.\n" + +#: utils/cpupower-info.c:28 +#, c-format +msgid "" +"\n" +"Passing no option will show all info, by default only on core 0\n" +msgstr "" +"\n" +"მნიშვნელობის არ-გადაცემის შემთხვევაში ნაჩვენები იქნება სრული ინფორმაცია. " +"ნაგულისხმევად მხოლოდ ნულოვან ბირთვზე\n" + +#: utils/cpupower-info.c:102 +#, c-format +msgid "System's multi core scheduler setting: " +msgstr "სისტემის მრავალბირთვიანობის მმართველის პარამეტრი: " + +#. if sysfs file is missing it's: errno == ENOENT +#: utils/cpupower-info.c:105 utils/cpupower-info.c:114 +#, c-format +msgid "not supported\n" +msgstr "მხარდაუჭერელია\n" + +#: utils/cpupower-info.c:111 +#, c-format +msgid "System's thread sibling scheduler setting: " +msgstr "სისტემის ნაკადის დის მართვის პარამეტრი: " + +#: utils/cpupower-info.c:126 +#, c-format +msgid "Intel's performance bias setting needs root privileges\n" +msgstr "Intel-ის წარმადობის bias-ის დაყენებას root-ის პრივილეგიები სჭირდება\n" + +#: utils/cpupower-info.c:128 +#, c-format +msgid "System does not support Intel's performance bias setting\n" +msgstr "" +"სისტემას intel-ის performance bias-ის დაყენების მხარდაჭერა არ გააჩნია\n" + +#: utils/cpupower-info.c:147 +#, c-format +msgid "Could not read perf-bias value\n" +msgstr "შეცდომა perf-bias -ის მნიშვნელობის წაკითხვისას\n" + +#: utils/cpupower-info.c:150 +#, c-format +msgid "perf-bias: %d\n" +msgstr "perf-bias: %d\n" + +#: utils/cpuidle-info.c:28 +#, c-format +msgid "Analyzing CPU %d:\n" +msgstr "%d-ე CPU -ის ანალიზი:\n" + +#: utils/cpuidle-info.c:32 +#, c-format +msgid "CPU %u: No idle states\n" +msgstr "CPU %u: უქმი მდგომარეობების გარეშე\n" + +#: utils/cpuidle-info.c:36 +#, c-format +msgid "CPU %u: Can't read idle state info\n" +msgstr "CPU %u: უქმი მდგომარეობის ინფორმაციის წაკითხვა შეუძლებელია\n" + +#: utils/cpuidle-info.c:41 +#, c-format +msgid "Could not determine max idle state %u\n" +msgstr "მაქსიმალური უქმე მდგომარეობის %u დადგენის შეცდომა\n" + +#: utils/cpuidle-info.c:46 +#, c-format +msgid "Number of idle states: %d\n" +msgstr "უქმე მდგომარეობების რაოდენობა: %d\n" + +#: utils/cpuidle-info.c:48 +#, c-format +msgid "Available idle states:" +msgstr "ხელმისაწვდომი უქმე მდგომარეობები:" + +#: utils/cpuidle-info.c:71 +#, c-format +msgid "Flags/Description: %s\n" +msgstr "ალმები/აღწერა: %s\n" + +#: utils/cpuidle-info.c:74 +#, c-format +msgid "Latency: %lu\n" +msgstr "დაყოვება: %lu\n" + +#: utils/cpuidle-info.c:76 +#, c-format +msgid "Usage: %lu\n" +msgstr "გამოყენება: %lu\n" + +#: utils/cpuidle-info.c:78 +#, c-format +msgid "Duration: %llu\n" +msgstr "ხანგრძლივობა: %llu\n" + +#: utils/cpuidle-info.c:90 +#, c-format +msgid "Could not determine cpuidle driver\n" +msgstr "CPUidle-ის დრაივერის დადგენის შეცდომა\n" + +#: utils/cpuidle-info.c:94 +#, c-format +msgid "CPUidle driver: %s\n" +msgstr "CPUidle -ის დრაივერი: %s\n" + +#: utils/cpuidle-info.c:99 +#, c-format +msgid "Could not determine cpuidle governor\n" +msgstr "CPUidle-ის მმართველის დადგენის შეცდომა\n" + +#: utils/cpuidle-info.c:103 +#, c-format +msgid "CPUidle governor: %s\n" +msgstr "CPUidle მმართველი: %s\n" + +#: utils/cpuidle-info.c:122 +#, c-format +msgid "CPU %u: Can't read C-state info\n" +msgstr "CPU %u: C-state -ის ინფორმაციის წაკითხვის შეცდომა\n" + +#. printf("Cstates: %d\n", cstates); +#: utils/cpuidle-info.c:127 +#, c-format +msgid "active state: C0\n" +msgstr "აქტიური მდგომარეობა: C0\n" + +#: utils/cpuidle-info.c:128 +#, c-format +msgid "max_cstate: C%u\n" +msgstr "max_cstate: C%u\n" + +#: utils/cpuidle-info.c:129 +#, c-format +msgid "maximum allowed latency: %lu usec\n" +msgstr "მაქსიმალური დასაშვები დაყოვნება: %lu usec\n" + +#: utils/cpuidle-info.c:130 +#, c-format +msgid "states:\t\n" +msgstr "მდგომარეობები:\t\n" + +#: utils/cpuidle-info.c:132 +#, c-format +msgid " C%d: type[C%d] " +msgstr " C%d: ტიპი[C%d] " + +#: utils/cpuidle-info.c:134 +#, c-format +msgid "promotion[--] demotion[--] " +msgstr "promotion[--] demotion[--] " + +#: utils/cpuidle-info.c:135 +#, c-format +msgid "latency[%03lu] " +msgstr "დაყოვნება[%03lu] " + +#: utils/cpuidle-info.c:137 +#, c-format +msgid "usage[%08lu] " +msgstr "გამოყენება[%08lu] " + +#: utils/cpuidle-info.c:139 +#, c-format +msgid "duration[%020Lu] \n" +msgstr "ხანგრძლივობა[%020Lu] \n" + +#: utils/cpuidle-info.c:147 +#, c-format +msgid "Usage: cpupower idleinfo [options]\n" +msgstr "გამოყენება: cpupower idleinfo [პარამეტრები]\n" + +#: utils/cpuidle-info.c:149 +#, c-format +msgid " -s, --silent Only show general C-state information\n" +msgstr " -s, --silent მხოლოდ ზოგადი C-state -ის ინფორმაციის ჩვენება\n" + +#: utils/cpuidle-info.c:150 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"acpi/processor/*/power\n" +" interface in older kernels\n" +msgstr "" +" -o, --proc გამოაქვს ინფორმაცია, როგორც ის /proc/acpi/processor/*/" +"power ფაილშია აღწერილი.\n" +" ინტერფეისი ძველ ბირთვებში იყო ხელმისაწვდომი\n" + +#: utils/cpuidle-info.c:209 +#, c-format +msgid "You can't specify more than one output-specific argument\n" +msgstr "ერთზე მეტი გამოტანის-შესატყვისი არგუმენტის მითითება აკრძალულია\n" diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h index f7065ae60a14..e1caefd467cd 100644 --- a/tools/power/cpupower/utils/builtin.h +++ b/tools/power/cpupower/utils/builtin.h @@ -8,6 +8,8 @@ extern int cmd_freq_set(int argc, const char **argv); extern int cmd_freq_info(int argc, const char **argv); extern int cmd_idle_set(int argc, const char **argv); extern int cmd_idle_info(int argc, const char **argv); +extern int cmd_cap_info(int argc, const char **argv); +extern int cmd_cap_set(int argc, const char **argv); extern int cmd_monitor(int argc, const char **argv); #endif diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 0646f615fe2d..c96b77365c63 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -572,9 +572,9 @@ int cmd_freq_info(int argc, char **argv) ret = 0; - /* Default is: show output of CPU 0 only */ + /* Default is: show output of base_cpu only */ if (bitmask_isallclear(cpus_chosen)) - bitmask_setbit(cpus_chosen, 0); + bitmask_setbit(cpus_chosen, base_cpu); switch (output_param) { case -1: diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index f2b202c5552a..44126a87fa7a 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -176,9 +176,9 @@ int cmd_idle_info(int argc, char **argv) cpuidle_exit(EXIT_FAILURE); } - /* Default is: show output of CPU 0 only */ + /* Default is: show output of base_cpu only */ if (bitmask_isallclear(cpus_chosen)) - bitmask_setbit(cpus_chosen, 0); + bitmask_setbit(cpus_chosen, base_cpu); if (output_param == 0) cpuidle_general_output(); diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 06345b543786..18fd7751f509 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -67,9 +67,9 @@ int cmd_info(int argc, char **argv) if (!params.params) params.params = 0x7; - /* Default is: show output of CPU 0 only */ + /* Default is: show output of base_cpu only */ if (bitmask_isallclear(cpus_chosen)) - bitmask_setbit(cpus_chosen, 0); + bitmask_setbit(cpus_chosen, base_cpu); /* Add more per cpu options here */ if (!params.perf_bias) diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 8ac3304a9957..9ec973165af1 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -54,6 +54,7 @@ static struct cmd_struct commands[] = { { "frequency-set", cmd_freq_set, 1 }, { "idle-info", cmd_idle_info, 0 }, { "idle-set", cmd_idle_set, 1 }, + { "powercap-info", cmd_cap_info, 0 }, { "set", cmd_set, 1 }, { "info", cmd_info, 0 }, { "monitor", cmd_monitor, 0 }, diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 7c77045fef52..075e766ff1f3 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -459,9 +459,10 @@ int cmd_monitor(int argc, char **argv) print_results(1, cpu); } - for (num = 0; num < avail_monitors; num++) - monitors[num]->unregister(); - + for (num = 0; num < avail_monitors; num++) { + if (monitors[num]->unregister) + monitors[num]->unregister(); + } cpu_topology_release(cpu_top); return 0; } diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def index 0d6ba4dbb9c7..7c926e90c87e 100644 --- a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def +++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def @@ -4,5 +4,6 @@ DEF(intel_nhm) DEF(intel_snb) DEF(intel_hsw_ext) DEF(mperf) +DEF(rapl) #endif DEF(cpuidle_sysfs) diff --git a/tools/power/cpupower/utils/idle_monitor/rapl_monitor.c b/tools/power/cpupower/utils/idle_monitor/rapl_monitor.c new file mode 100644 index 000000000000..46153f1291c3 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/rapl_monitor.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (C) 2016 SUSE Software Solutions GmbH + * Thomas Renninger <trenn@suse.de> + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <time.h> +#include <string.h> + +#include <pci/pci.h> + +#include "idle_monitor/cpupower-monitor.h" +#include "helpers/helpers.h" +#include "powercap.h" + +#define MAX_RAPL_ZONES 10 + +int rapl_zone_count; +cstate_t rapl_zones[MAX_RAPL_ZONES]; +struct powercap_zone *rapl_zones_pt[MAX_RAPL_ZONES] = { 0 }; + +unsigned long long rapl_zone_previous_count[MAX_RAPL_ZONES]; +unsigned long long rapl_zone_current_count[MAX_RAPL_ZONES]; +unsigned long long rapl_max_count; + +static int rapl_get_count_uj(unsigned int id, unsigned long long *count, + unsigned int cpu) +{ + if (rapl_zones_pt[id] == NULL) + /* error */ + return -1; + + *count = rapl_zone_current_count[id] - rapl_zone_previous_count[id]; + + return 0; +} + +static int powercap_count_zones(struct powercap_zone *zone) +{ + uint64_t val; + int uj; + + if (rapl_zone_count >= MAX_RAPL_ZONES) + return -1; + + if (!zone->has_energy_uj) + return 0; + + printf("%s\n", zone->sys_name); + uj = powercap_get_energy_uj(zone, &val); + printf("%d\n", uj); + + strncpy(rapl_zones[rapl_zone_count].name, zone->name, CSTATE_NAME_LEN - 1); + strcpy(rapl_zones[rapl_zone_count].desc, ""); + rapl_zones[rapl_zone_count].id = rapl_zone_count; + rapl_zones[rapl_zone_count].range = RANGE_MACHINE; + rapl_zones[rapl_zone_count].get_count = rapl_get_count_uj; + rapl_zones_pt[rapl_zone_count] = zone; + rapl_zone_count++; + + return 0; +} + +static int rapl_start(void) +{ + int i, ret; + uint64_t uj_val; + + for (i = 0; i < rapl_zone_count; i++) { + ret = powercap_get_energy_uj(rapl_zones_pt[i], &uj_val); + if (ret) + return ret; + rapl_zone_previous_count[i] = uj_val; + } + + return 0; +} + +static int rapl_stop(void) +{ + int i; + uint64_t uj_val; + + for (i = 0; i < rapl_zone_count; i++) { + int ret; + + ret = powercap_get_energy_uj(rapl_zones_pt[i], &uj_val); + if (ret) + return ret; + rapl_zone_current_count[i] = uj_val; + if (rapl_max_count < uj_val) + rapl_max_count = uj_val - rapl_zone_previous_count[i]; + } + return 0; +} + +struct cpuidle_monitor *rapl_register(void) +{ + struct powercap_zone *root_zone; + char line[MAX_LINE_LEN] = ""; + int ret, val; + + ret = powercap_get_driver(line, MAX_LINE_LEN); + if (ret < 0) { + dprint("No powercapping driver loaded\n"); + return NULL; + } + + dprint("Driver: %s\n", line); + ret = powercap_get_enabled(&val); + if (ret < 0) + return NULL; + if (!val) { + dprint("Powercapping is disabled\n"); + return NULL; + } + + dprint("Powercap domain hierarchy:\n\n"); + root_zone = powercap_init_zones(); + + if (root_zone == NULL) { + dprint("No powercap info found\n"); + return NULL; + } + + powercap_walk_zones(root_zone, powercap_count_zones); + rapl_monitor.hw_states_num = rapl_zone_count; + + return &rapl_monitor; +} + +struct cpuidle_monitor rapl_monitor = { + .name = "RAPL", + .hw_states = rapl_zones, + .hw_states_num = 0, + .start = rapl_start, + .stop = rapl_stop, + .do_register = rapl_register, + .flags.needs_root = 0, + .overflow_s = 60 * 60 * 24 * 100, /* To be implemented */ +}; + +#endif diff --git a/tools/power/cpupower/utils/powercap-info.c b/tools/power/cpupower/utils/powercap-info.c new file mode 100644 index 000000000000..3ea4486f1a0e --- /dev/null +++ b/tools/power/cpupower/utils/powercap-info.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (C) 2016 SUSE Software Solutions GmbH + * Thomas Renninger <trenn@suse.de> + */ + +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include <getopt.h> + +#include "powercap.h" +#include "helpers/helpers.h" + +int powercap_show_all; + +static struct option info_opts[] = { + { "all", no_argument, NULL, 'a'}, + { }, +}; + +static int powercap_print_one_zone(struct powercap_zone *zone) +{ + int mode, i, ret = 0; + char pr_prefix[1024] = ""; + + for (i = 0; i < zone->tree_depth && i < POWERCAP_MAX_TREE_DEPTH; i++) + strcat(pr_prefix, "\t"); + + printf("%sZone: %s", pr_prefix, zone->name); + ret = powercap_zone_get_enabled(zone, &mode); + if (ret < 0) + return ret; + printf(" (%s)\n", mode ? "enabled" : "disabled"); + + if (zone->has_power_uw) + printf(_("%sPower can be monitored in micro Jules\n"), + pr_prefix); + + if (zone->has_energy_uj) + printf(_("%sPower can be monitored in micro Watts\n"), + pr_prefix); + + printf("\n"); + + if (ret != 0) + return ret; + return ret; +} + +static int powercap_show(void) +{ + struct powercap_zone *root_zone; + char line[MAX_LINE_LEN] = ""; + int ret, val; + + ret = powercap_get_driver(line, MAX_LINE_LEN); + if (ret < 0) { + printf(_("No powercapping driver loaded\n")); + return ret; + } + + printf("Driver: %s\n", line); + ret = powercap_get_enabled(&val); + if (ret < 0) + return ret; + if (!val) { + printf(_("Powercapping is disabled\n")); + return -1; + } + + printf(_("Powercap domain hierarchy:\n\n")); + root_zone = powercap_init_zones(); + + if (root_zone == NULL) { + printf(_("No powercap info found\n")); + return 1; + } + + powercap_walk_zones(root_zone, powercap_print_one_zone); + + return 0; +} + +int cmd_cap_set(int argc, char **argv) +{ + return 0; +}; +int cmd_cap_info(int argc, char **argv) +{ + int ret = 0, cont = 1; + + do { + ret = getopt_long(argc, argv, "a", info_opts, NULL); + switch (ret) { + case '?': + cont = 0; + break; + case -1: + cont = 0; + break; + case 'a': + powercap_show_all = 1; + break; + default: + fprintf(stderr, _("invalid or unknown argument\n")); + return EXIT_FAILURE; + } + } while (cont); + + powercap_show(); + return 0; +} diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index cfe343306e08..c60c90f35d18 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1462,7 +1462,7 @@ class Data: 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'ABORT' : r'(?i).*\bABORT\b.*', 'IRQ' : r'.*\bgenirq: .*', - 'TASKFAIL': r'.*Freezing of tasks *.*', + 'TASKFAIL': r'.*Freezing .*after *.*', 'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*', 'DISKFULL': r'.*\bNo space left on device.*', 'USBERR' : r'.*usb .*device .*, error [0-9-]*', diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 500be85729cc..0805f08af8b3 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -10,6 +10,7 @@ ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=cxl_hdm_decode_init +ldflags-y += --wrap=cxl_rcrb_to_component DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -26,6 +27,7 @@ cxl_acpi-y += config_check.o obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o +cxl_pmem-y += $(CXL_SRC)/security.o cxl_pmem-y += config_check.o obj-m += cxl_port.o diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index de5e5b3652fd..c4c457e59841 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -10,4 +10,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 7edce12fd2ce..30ee680d38ff 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -11,11 +11,15 @@ #include <cxlmem.h> #include "mock.h" +static int interleave_arithmetic; + #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_SINGLE_HOST 1 +#define NR_CXL_RCH 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 +#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH) static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; @@ -35,6 +39,8 @@ static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; struct platform_device *cxl_mem[NR_MEM_MULTI]; struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; +static struct platform_device *cxl_rch[NR_CXL_RCH]; +static struct platform_device *cxl_rcd[NR_CXL_RCH]; static inline bool is_multi_bridge(struct device *dev) { @@ -57,7 +63,7 @@ static inline bool is_single_bridge(struct device *dev) } static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { +static struct acpi_device host_bridge[NR_BRIDGES] = { [0] = { .handle = &host_bridge[0], }, @@ -67,7 +73,9 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] [2] = { .handle = &host_bridge[2], }, - + [3] = { + .handle = &host_bridge[3], + }, }; static bool is_mock_dev(struct device *dev) @@ -80,6 +88,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) if (dev == &cxl_mem_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) + if (dev == &cxl_rcd[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -101,7 +112,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; + struct acpi_cedt_chbs chbs[NR_BRIDGES]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -122,6 +133,26 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; } cfmws4; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws5; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws6; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[2]; + } cfmws7; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[4]; + } cfmws8; + struct { + struct acpi_cedt_cxims cxims; + u64 xormap_list[2]; + } cxims0; } __packed mock_cedt = { .cedt = { .header = { @@ -154,6 +185,14 @@ static struct { .uid = 2, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[3] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 3, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL11, + }, .cfmws0 = { .cfmws = { .header = { @@ -229,6 +268,81 @@ static struct { }, .target = { 2 }, }, + .cfmws5 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws5), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, + .qtg_id = 5, + .window_size = SZ_256M, + }, + .target = { 3 }, + }, + /* .cfmws6,7,8 use ACPI_CEDT_CFMWS_ARITHMETIC_XOR */ + .cfmws6 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws6), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, }, + }, + .cfmws7 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws7), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 1, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 1, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, 1, }, + }, + .cfmws8 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws8), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 2, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 16UL, + }, + .target = { 0, 1, 0, 1, }, + }, + .cxims0 = { + .cxims = { + .header = { + .type = ACPI_CEDT_TYPE_CXIMS, + .length = sizeof(mock_cedt.cxims0), + }, + .hbig = 0, + .nr_xormaps = 2, + }, + .xormap_list = { 0x404100, 0x808200, }, + }, }; struct acpi_cedt_cfmws *mock_cfmws[] = { @@ -237,6 +351,22 @@ struct acpi_cedt_cfmws *mock_cfmws[] = { [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, [4] = &mock_cedt.cfmws4.cfmws, + [5] = &mock_cedt.cfmws5.cfmws, + /* Modulo Math above, XOR Math below */ + [6] = &mock_cedt.cfmws6.cfmws, + [7] = &mock_cedt.cfmws7.cfmws, + [8] = &mock_cedt.cfmws8.cfmws, +}; + +static int cfmws_start; +static int cfmws_end; +#define CFMWS_MOD_ARRAY_START 0 +#define CFMWS_MOD_ARRAY_END 5 +#define CFMWS_XOR_ARRAY_START 6 +#define CFMWS_XOR_ARRAY_END 8 + +struct acpi_cedt_cxims *mock_cxims[1] = { + [0] = &mock_cedt.cxims0.cxims, }; struct cxl_mock_res { @@ -262,11 +392,11 @@ static void depopulate_all_mock_resources(void) mutex_unlock(&mock_res_lock); } -static struct cxl_mock_res *alloc_mock_res(resource_size_t size) +static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align) { struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL); struct genpool_data_align data = { - .align = SZ_256M, + .align = align, }; unsigned long phys; @@ -301,17 +431,17 @@ static int populate_cedt(void) else size = ACPI_CEDT_CHBS_LENGTH_CXL11; - res = alloc_mock_res(size); + res = alloc_mock_res(size, size); if (!res) return -ENOMEM; chbs->base = res->range.start; chbs->length = size; } - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { struct acpi_cedt_cfmws *window = mock_cfmws[i]; - res = alloc_mock_res(window->window_size); + res = alloc_mock_res(window->window_size, SZ_256M); if (!res) return -ENOMEM; window->base_hpa = res->range.start; @@ -320,10 +450,12 @@ static int populate_cedt(void) return 0; } +static bool is_mock_port(struct device *dev); + /* - * WARNING, this hack assumes the format of 'struct - * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that - * the first struct member is the device being probed by the cxl_acpi + * WARNING, this hack assumes the format of 'struct cxl_cfmws_context' + * and 'struct cxl_chbs_context' share the property that the first + * struct member is a cxl_test device being probed by the cxl_acpi * driver. */ struct cxl_cedt_context { @@ -340,7 +472,7 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, unsigned long end; int i; - if (dev != &cxl_acpi->dev) + if (!is_mock_port(dev) && !is_mock_dev(dev)) return acpi_table_parse_cedt(id, handler_arg, arg); if (id == ACPI_CEDT_TYPE_CHBS) @@ -351,12 +483,19 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, } if (id == ACPI_CEDT_TYPE_CFMWS) - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { h = (union acpi_subtable_headers *) mock_cfmws[i]; end = (unsigned long) h + mock_cfmws[i]->header.length; handler_arg(h, arg, end); } + if (id == ACPI_CEDT_TYPE_CXIMS) + for (i = 0; i < ARRAY_SIZE(mock_cxims); i++) { + h = (union acpi_subtable_headers *)mock_cxims[i]; + end = (unsigned long)h + mock_cxims[i]->header.length; + handler_arg(h, arg, end); + } + return 0; } @@ -370,6 +509,10 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) if (dev == &cxl_hb_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) + if (dev == &cxl_rch[i]->dev) + return true; + return false; } @@ -439,14 +582,21 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, return AE_OK; } -static struct pci_bus mock_pci_bus[NR_CXL_HOST_BRIDGES]; -static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { +static struct pci_bus mock_pci_bus[NR_BRIDGES]; +static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = { [0] = { .bus = &mock_pci_bus[0], }, [1] = { .bus = &mock_pci_bus[1], }, + [2] = { + .bus = &mock_pci_bus[2], + }, + [3] = { + .bus = &mock_pci_bus[3], + }, + }; static bool is_mock_bus(struct pci_bus *bus) @@ -634,7 +784,6 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { - struct device *dev = &port->dev; struct platform_device **array; int i, array_size; @@ -684,19 +833,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - if (IS_ERR(dport)) { - dev_err(dev, "failed to add dport: %s (%ld)\n", - dev_name(&pdev->dev), PTR_ERR(dport)); + if (IS_ERR(dport)) return PTR_ERR(dport); - } - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, - dev_name(&pdev->dev)); } return 0; } +resource_size_t mock_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which); + + return (resource_size_t) which + 1; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -705,6 +857,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_dev = is_mock_dev, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, + .cxl_rcrb_to_component = mock_cxl_rcrb_to_component, .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .devm_cxl_setup_hdm = mock_cxl_setup_hdm, @@ -729,6 +882,87 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_rch_init(void) +{ + int rc, i; + + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) { + int idx = NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + i; + struct acpi_device *adev = &host_bridge[idx]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", idx); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_rch[i] = pdev; + mock_pci_bus[idx].bridge = &pdev->dev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "firmware_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { + int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; + struct platform_device *rch = cxl_rch[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_rcd", idx); + if (!pdev) + goto err_mem; + pdev->dev.parent = &rch->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_rcd[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } + + return rc; +} + +static void cxl_rch_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } +} + static __init int cxl_single_init(void) { int i, rc; @@ -751,6 +985,7 @@ static __init int cxl_single_init(void) } cxl_hb_single[i] = pdev; + mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -897,6 +1132,16 @@ static __init int cxl_test_init(void) if (rc) goto err_gen_pool_add; + if (interleave_arithmetic == 1) { + cfmws_start = CFMWS_XOR_ARRAY_START; + cfmws_end = CFMWS_XOR_ARRAY_END; + dev_dbg(NULL, "cxl_test loading xor math option\n"); + } else { + cfmws_start = CFMWS_MOD_ARRAY_START; + cfmws_end = CFMWS_MOD_ARRAY_END; + dev_dbg(NULL, "cxl_test loading modulo math option\n"); + } + rc = populate_cedt(); if (rc) goto err_populate; @@ -917,6 +1162,7 @@ static __init int cxl_test_init(void) } cxl_host_bridge[i] = pdev; + mock_pci_bus[i].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -999,9 +1245,13 @@ static __init int cxl_test_init(void) if (rc) goto err_mem; + rc = cxl_rch_init(); + if (rc) + goto err_single; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_single; + goto err_rch; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -1014,6 +1264,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_rch: + cxl_rch_exit(); err_single: cxl_single_exit(); err_mem: @@ -1051,6 +1303,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_rch_exit(); cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -1073,6 +1326,8 @@ static __exit void cxl_test_exit(void) unregister_cxl_mock_ops(&cxl_mock_ops); } +module_param(interleave_arithmetic, int, 0000); +MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index aa2df3a15051..5e4ecd93f1d2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -65,6 +65,18 @@ static struct { }, }; +#define PASS_TRY_LIMIT 3 + +struct cxl_mockmem_data { + void *lsa; + u32 security_state; + u8 user_pass[NVDIMM_PASSPHRASE_LEN]; + u8 master_pass[NVDIMM_PASSPHRASE_LEN]; + int user_limit; + int master_limit; + +}; + static int mock_gsl(struct cxl_mbox_cmd *cmd) { if (cmd->size_out < sizeof(mock_gsl_payload)) @@ -100,6 +112,24 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } +static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_identify id = { + .fw_revision = { "mock fw v1 " }, + .total_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + .volatile_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + }; + + if (cmd->size_out < sizeof(id)) + return -EINVAL; + + memcpy(cmd->payload_out, &id, sizeof(id)); + + return 0; +} + static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_identify id = { @@ -137,10 +167,334 @@ static int mock_partition_info(struct cxl_dev_state *cxlds, return 0; } +static int mock_get_security_state(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in) + return -EINVAL; + + if (cmd->size_out != sizeof(u32)) + return -EINVAL; + + memcpy(cmd->payload_out, &mdata->security_state, sizeof(u32)); + + return 0; +} + +static void master_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->master_limit == PASS_TRY_LIMIT) + return; + mdata->master_limit++; + if (mdata->master_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PLIMIT; +} + +static void user_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->user_limit == PASS_TRY_LIMIT) + return; + mdata->user_limit++; + if (mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; +} + +static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_set_pass *set_pass; + + if (cmd->size_in != sizeof(*set_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + set_pass = cmd->payload_in; + switch (set_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + /* + * CXL spec rev3.0 8.2.9.8.6.2, The master pasphrase shall only be set in + * the security disabled state when the user passphrase is not set. + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->master_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->master_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->user_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->user_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PASS_SET; + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + } + return -EINVAL; +} + +static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_disable_pass *dis_pass; + + if (cmd->size_in != sizeof(*dis_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + dis_pass = cmd->payload_in; + switch (dis_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->master_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->master_limit = 0; + memset(mdata->master_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~(CXL_PMEM_SEC_STATE_USER_PASS_SET | + CXL_PMEM_SEC_STATE_LOCKED); + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + return 0; +} + +static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != 0) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) + return 0; + + mdata->security_state |= CXL_PMEM_SEC_STATE_FROZEN; + return 0; +} + +static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != NVDIMM_PASSPHRASE_LEN) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(cmd->payload_in, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + if (++mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + return 0; +} + +static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_pass_erase *erase; + + if (cmd->size_in != sizeof(*erase)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + erase = cmd->payload_in; + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_USER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_MASTER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + switch (erase->type) { + case CXL_PMEM_SEC_PASS_MASTER: + /* + * The spec does not clearly define the behavior of the scenario + * where a master passphrase is passed in while the master + * passphrase is not set and user passphrase is not set. The + * code will take the assumption that it will behave the same + * as a CXL secure erase command without passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) { + if (memcmp(mdata->master_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->master_limit = 0; + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + } else { + /* + * CXL rev3 8.2.9.8.6.3 Disable Passphrase + * When master passphrase is disabled, the device shall + * return Invalid Input for the Passphrase Secure Erase + * command with master passphrase. + */ + return -EINVAL; + } + /* Scramble encryption keys so that data is effectively erased */ + break; + case CXL_PMEM_SEC_PASS_USER: + /* + * The spec does not clearly define the behavior of the scenario + * where a user passphrase is passed in while the user + * passphrase is not set. The code will take the assumption that + * it will behave the same as a CXL secure erase command without + * passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + if (memcmp(mdata->user_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + } + + /* + * CXL rev3 Table 8-118 + * If user passphrase is not set or supported by device, current + * passphrase value is ignored. Will make the assumption that + * the operation will proceed as secure erase w/o passphrase + * since spec is not explicit. + */ + + /* Scramble encryption keys so that data is effectively erased */ + break; + default: + return -EINVAL; + } + + return 0; +} + static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*get_lsa) > cmd->size_in) @@ -159,7 +513,8 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*set_lsa) > cmd->size_in) @@ -216,7 +571,10 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * rc = mock_get_log(cxlds, cmd); break; case CXL_MBOX_OP_IDENTIFY: - rc = mock_id(cxlds, cmd); + if (cxlds->rcd) + rc = mock_rcd_id(cxlds, cmd); + else + rc = mock_id(cxlds, cmd); break; case CXL_MBOX_OP_GET_LSA: rc = mock_get_lsa(cxlds, cmd); @@ -230,6 +588,24 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * case CXL_MBOX_OP_GET_HEALTH_INFO: rc = mock_health_info(cxlds, cmd); break; + case CXL_MBOX_OP_GET_SECURITY_STATE: + rc = mock_get_security_state(cxlds, cmd); + break; + case CXL_MBOX_OP_SET_PASSPHRASE: + rc = mock_set_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_DISABLE_PASSPHRASE: + rc = mock_disable_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_FREEZE_SECURITY: + rc = mock_freeze_security(cxlds, cmd); + break; + case CXL_MBOX_OP_UNLOCK: + rc = mock_unlock_security(cxlds, cmd); + break; + case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: + rc = mock_passphrase_secure_erase(cxlds, cmd); + break; default: break; } @@ -245,21 +621,32 @@ static void label_area_release(void *lsa) vfree(lsa); } +static bool is_rcd(struct platform_device *pdev) +{ + const struct platform_device_id *id = platform_get_device_id(pdev); + + return !!id->driver_data; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; struct cxl_dev_state *cxlds; - void *lsa; + struct cxl_mockmem_data *mdata; int rc; - lsa = vmalloc(LSA_SIZE); - if (!lsa) + mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); + if (!mdata) return -ENOMEM; - rc = devm_add_action_or_reset(dev, label_area_release, lsa); + dev_set_drvdata(dev, mdata); + + mdata->lsa = vmalloc(LSA_SIZE); + if (!mdata->lsa) + return -ENOMEM; + rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa); if (rc) return rc; - dev_set_drvdata(dev, lsa); cxlds = cxl_dev_state_create(dev); if (IS_ERR(cxlds)) @@ -268,6 +655,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; cxlds->payload_size = SZ_4K; + if (is_rcd(pdev)) { + cxlds->rcd = true; + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + } rc = cxl_enumerate_cmds(cxlds); if (rc) @@ -285,14 +676,51 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) - rc = devm_cxl_add_nvdimm(dev, cxlmd); - return 0; } +static ssize_t security_lock_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", + !!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)); +} + +static ssize_t security_lock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + u32 mask = CXL_PMEM_SEC_STATE_FROZEN | CXL_PMEM_SEC_STATE_USER_PLIMIT | + CXL_PMEM_SEC_STATE_MASTER_PLIMIT; + int val; + + if (kstrtoint(buf, 0, &val) < 0) + return -EINVAL; + + if (val == 1) { + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) + return -ENXIO; + mdata->security_state |= CXL_PMEM_SEC_STATE_LOCKED; + mdata->security_state &= ~mask; + } else { + return -EINVAL; + } + return count; +} + +static DEVICE_ATTR_RW(security_lock); + +static struct attribute *cxl_mock_mem_attrs[] = { + &dev_attr_security_lock.attr, + NULL +}; +ATTRIBUTE_GROUPS(cxl_mock_mem); + static const struct platform_device_id cxl_mock_mem_ids[] = { - { .name = "cxl_mem", }, + { .name = "cxl_mem", 0 }, + { .name = "cxl_rcd", 1 }, { }, }; MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids); @@ -302,6 +730,7 @@ static struct platform_driver cxl_mock_mem_driver = { .id_table = cxl_mock_mem_ids, .driver = { .name = KBUILD_MODNAME, + .dev_groups = cxl_mock_mem_groups, }, }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index bce6a21df0d5..5dface08e0de 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -224,6 +224,25 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); +resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + int index; + resource_size_t component_reg_phys; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(dev)) + component_reg_phys = + ops->cxl_rcrb_to_component(dev, rcrb, which); + else + component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which); + put_cxl_mock_ops(index); + + return component_reg_phys; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 738f24e3988a..ef33f159375e 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -15,6 +15,9 @@ struct cxl_mock_ops { acpi_string pathname, struct acpi_object_list *arguments, unsigned long long *data); + resource_size_t (*cxl_rcrb_to_component)(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 09d1578f9d66..ac59999ed3de 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -802,7 +802,14 @@ sub process_variables { my $end = $3; # append beginning of value to retval $retval = "$retval$begin"; - if (defined($variable{$var})) { + if ($var =~ s/^shell\s+//) { + $retval = `$var`; + if ($?) { + doprint "WARNING: $var returned an error\n"; + } else { + chomp $retval; + } + } elsif (defined($variable{$var})) { $retval = "$retval$variable{$var}"; } elsif (defined($remove_undef) && $remove_undef) { # for if statements, any variable that is not defined, @@ -1963,7 +1970,7 @@ sub run_scp_mod { sub _get_grub_index { - my ($command, $target, $skip) = @_; + my ($command, $target, $skip, $submenu) = @_; return if (defined($grub_number) && defined($last_grub_menu) && $last_grub_menu eq $grub_menu && defined($last_machine) && @@ -1980,11 +1987,16 @@ sub _get_grub_index { my $found = 0; + my $submenu_number = 0; + while (<IN>) { if (/$target/) { $grub_number++; $found = 1; last; + } elsif (defined($submenu) && /$submenu/) { + $submenu_number++; + $grub_number = -1; } elsif (/$skip/) { $grub_number++; } @@ -1993,6 +2005,9 @@ sub _get_grub_index { dodie "Could not find '$grub_menu' through $command on $machine" if (!$found); + if ($submenu_number > 0) { + $grub_number = "$submenu_number>$grub_number"; + } doprint "$grub_number\n"; $last_grub_menu = $grub_menu; $last_machine = $machine; @@ -2003,6 +2018,7 @@ sub get_grub_index { my $command; my $target; my $skip; + my $submenu; my $grub_menu_qt; if ($reboot_type !~ /^grub/) { @@ -2017,8 +2033,9 @@ sub get_grub_index { $skip = '^\s*title\s'; } elsif ($reboot_type eq "grub2") { $command = "cat $grub_file"; - $target = '^menuentry.*' . $grub_menu_qt; - $skip = '^menuentry\s|^submenu\s'; + $target = '^\s*menuentry.*' . $grub_menu_qt; + $skip = '^\s*menuentry'; + $submenu = '^\s*submenu\s'; } elsif ($reboot_type eq "grub2bls") { $command = $grub_bls_get; $target = '^title=.*' . $grub_menu_qt; @@ -2027,7 +2044,7 @@ sub get_grub_index { return; } - _get_grub_index($command, $target, $skip); + _get_grub_index($command, $target, $skip, $submenu); } sub wait_for_input { @@ -2090,7 +2107,7 @@ sub reboot_to { if ($reboot_type eq "grub") { run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) { - run_ssh "$grub_reboot $grub_number"; + run_ssh "$grub_reboot \"'$grub_number'\""; } elsif ($reboot_type eq "syslinux") { run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; } elsif (defined $reboot_script) { @@ -3768,9 +3785,10 @@ sub test_this_config { # .config to make sure it is missing the config that # we had before my %configs = %min_configs; - delete $configs{$config}; + $configs{$config} = "# $config is not set"; make_new_config ((values %configs), (values %keep_configs)); make_oldconfig; + delete $configs{$config}; undef %configs; assign_configs \%configs, $output_config; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 5e7d1d729752..2d0fe15a096d 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -259,6 +259,14 @@ # If PATH is not a config variable, then the ${PATH} in # the MAKE_CMD option will be evaluated by the shell when # the MAKE_CMD option is passed into shell processing. +# +# Shell commands can also be inserted with the ${shell <command>} +# expression. Note, this is case sensitive, thus ${SHELL <command>} +# will not work. +# +# HOSTNAME := ${shell hostname} +# DEFAULTS IF "${HOSTNAME}" == "frodo" +# #### Using options in other options #### # diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 4d4663fb578b..43fbe96318fe 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -192,28 +192,30 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]: parse_start = time.time() - test_result = kunit_parser.Test() - if request.raw_output: # Treat unparsed results as one passing test. - test_result.status = kunit_parser.TestStatus.SUCCESS - test_result.counts.passed = 1 + fake_test = kunit_parser.Test() + fake_test.status = kunit_parser.TestStatus.SUCCESS + fake_test.counts.passed = 1 output: Iterable[str] = input_data if request.raw_output == 'all': pass elif request.raw_output == 'kunit': - output = kunit_parser.extract_tap_lines(output, lstrip=False) + output = kunit_parser.extract_tap_lines(output) for line in output: print(line.rstrip()) + parse_time = time.time() - parse_start + return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test - else: - test_result = kunit_parser.parse_run_tests(input_data) - parse_end = time.time() + + # Actually parse the test results. + test = kunit_parser.parse_run_tests(input_data) + parse_time = time.time() - parse_start if request.json: json_str = kunit_json.get_json_result( - test=test_result, + test=test, metadata=metadata) if request.json == 'stdout': print(json_str) @@ -223,10 +225,10 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input stdout.print_with_timestamp("Test results stored in %s" % os.path.abspath(request.json)) - if test_result.status != kunit_parser.TestStatus.SUCCESS: - return KunitResult(KunitStatus.TEST_FAILURE, parse_end - parse_start), test_result + if test.status != kunit_parser.TestStatus.SUCCESS: + return KunitResult(KunitStatus.TEST_FAILURE, parse_time), test - return KunitResult(KunitStatus.SUCCESS, parse_end - parse_start), test_result + return KunitResult(KunitStatus.SUCCESS, parse_time), test def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: @@ -359,14 +361,14 @@ def add_exec_opts(parser) -> None: choices=['suite', 'test']) def add_parse_opts(parser) -> None: - parser.add_argument('--raw_output', help='If set don\'t format output from kernel. ' - 'If set to --raw_output=kunit, filters to just KUnit output.', + parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' + 'By default, filters to just KUnit output. Use ' + '--raw_output=all to show everything', type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) parser.add_argument('--json', nargs='?', - help='Stores test results in a JSON, and either ' - 'prints to stdout or saves to file if a ' - 'filename is specified', + help='Prints parsed test results as JSON to stdout or a file if ' + 'a filename is specified. Does nothing if --raw_output is set.', type=str, const='stdout', default=None, metavar='FILE') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 1ae873e3e341..a225799f6b1b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -10,8 +10,10 @@ # Author: Rae Moar <rmoar@google.com> from __future__ import annotations +from dataclasses import dataclass import re import sys +import textwrap from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple @@ -58,6 +60,10 @@ class Test: self.counts.errors += 1 stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') + def ok_status(self) -> bool: + """Returns true if the status was ok, i.e. passed or skipped.""" + return self.status in (TestStatus.SUCCESS, TestStatus.SKIPPED) + class TestStatus(Enum): """An enumeration class to represent the status of a test.""" SUCCESS = auto() @@ -67,27 +73,17 @@ class TestStatus(Enum): NO_TESTS = auto() FAILURE_TO_PARSE_TESTS = auto() +@dataclass class TestCounts: """ Tracks the counts of statuses of all test cases and any errors within a Test. - - Attributes: - passed : int - the number of tests that have passed - failed : int - the number of tests that have failed - crashed : int - the number of tests that have crashed - skipped : int - the number of tests that have skipped - errors : int - the number of errors in the test and subtests - """ - def __init__(self): - """Creates TestCounts object with counts of all test - statuses and test errors set to 0. - """ - self.passed = 0 - self.failed = 0 - self.crashed = 0 - self.skipped = 0 - self.errors = 0 + """ + passed: int = 0 + failed: int = 0 + crashed: int = 0 + skipped: int = 0 + errors: int = 0 def __str__(self) -> str: """Returns the string representation of a TestCounts object.""" @@ -213,12 +209,12 @@ class LineStream: # Parsing helper methods: -KTAP_START = re.compile(r'KTAP version ([0-9]+)$') -TAP_START = re.compile(r'TAP version ([0-9]+)$') -KTAP_END = re.compile('(List of all partitions:|' +KTAP_START = re.compile(r'\s*KTAP version ([0-9]+)$') +TAP_START = re.compile(r'\s*TAP version ([0-9]+)$') +KTAP_END = re.compile(r'\s*(List of all partitions:|' 'Kernel panic - not syncing: VFS:|reboot: System halted)') -def extract_tap_lines(kernel_output: Iterable[str], lstrip=True) -> LineStream: +def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: """Extracts KTAP lines from the kernel output.""" def isolate_ktap_output(kernel_output: Iterable[str]) \ -> Iterator[Tuple[int, str]]: @@ -244,11 +240,8 @@ def extract_tap_lines(kernel_output: Iterable[str], lstrip=True) -> LineStream: # stop extracting KTAP lines break elif started: - # remove the prefix and optionally any leading - # whitespace. Our parsing logic relies on this. + # remove the prefix, if any. line = line[prefix_len:] - if lstrip: - line = line.lstrip() yield line_num, line return LineStream(lines=isolate_ktap_output(kernel_output)) @@ -300,10 +293,10 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: check_version(version_num, TAP_VERSIONS, 'TAP', test) else: return False - test.log.append(lines.pop()) + lines.pop() return True -TEST_HEADER = re.compile(r'^# Subtest: (.*)$') +TEST_HEADER = re.compile(r'^\s*# Subtest: (.*)$') def parse_test_header(lines: LineStream, test: Test) -> bool: """ @@ -323,11 +316,11 @@ def parse_test_header(lines: LineStream, test: Test) -> bool: match = TEST_HEADER.match(lines.peek()) if not match: return False - test.log.append(lines.pop()) test.name = match.group(1) + lines.pop() return True -TEST_PLAN = re.compile(r'1\.\.([0-9]+)') +TEST_PLAN = re.compile(r'^\s*1\.\.([0-9]+)') def parse_test_plan(lines: LineStream, test: Test) -> bool: """ @@ -350,14 +343,14 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: if not match: test.expected_count = None return False - test.log.append(lines.pop()) expected_count = int(match.group(1)) test.expected_count = expected_count + lines.pop() return True -TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') +TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') -TEST_RESULT_SKIP = re.compile(r'^(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') +TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') def peek_test_name_match(lines: LineStream, test: Test) -> bool: """ @@ -414,7 +407,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check if line matches test result line format if not match: return False - test.log.append(lines.pop()) + lines.pop() # Set name of test object if skip_match: @@ -446,6 +439,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]: - '# Subtest: [test name]' - '[ok|not ok] [test number] [-] [test name] [optional skip directive]' + - 'KTAP version [version number]' Parameters: lines - LineStream of KTAP output to parse @@ -454,8 +448,9 @@ def parse_diagnostic(lines: LineStream) -> List[str]: Log of diagnostic lines """ log = [] # type: List[str] - while lines and not TEST_RESULT.match(lines.peek()) and not \ - TEST_HEADER.match(lines.peek()): + non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START] + while lines and not any(re.match(lines.peek()) + for re in non_diagnostic_lines): log.append(lines.pop()) return log @@ -501,17 +496,22 @@ def print_test_header(test: Test) -> None: test - Test object representing current test being printed """ message = test.name + if message != "": + # Add a leading space before the subtest counts only if a test name + # is provided using a "# Subtest" header line. + message += " " if test.expected_count: if test.expected_count == 1: - message += ' (1 subtest)' + message += '(1 subtest)' else: - message += f' ({test.expected_count} subtests)' + message += f'({test.expected_count} subtests)' stdout.print_with_timestamp(format_test_divider(message, len(message))) def print_log(log: Iterable[str]) -> None: """Prints all strings in saved log for test in yellow.""" - for m in log: - stdout.print_with_timestamp(stdout.yellow(m)) + formatted = textwrap.dedent('\n'.join(log)) + for line in formatted.splitlines(): + stdout.print_with_timestamp(stdout.yellow(line)) def format_test_result(test: Test) -> str: """ @@ -565,6 +565,40 @@ def print_test_footer(test: Test) -> None: stdout.print_with_timestamp(format_test_divider(message, len(message) - stdout.color_len())) + + +def _summarize_failed_tests(test: Test) -> str: + """Tries to summarize all the failing subtests in `test`.""" + + def failed_names(test: Test, parent_name: str) -> List[str]: + # Note: we use 'main' internally for the top-level test. + if not parent_name or parent_name == 'main': + full_name = test.name + else: + full_name = parent_name + '.' + test.name + + if not test.subtests: # this is a leaf node + return [full_name] + + # If all the children failed, just say this subtest failed. + # Don't summarize it down "the top-level test failed", though. + failed_subtests = [sub for sub in test.subtests if not sub.ok_status()] + if parent_name and len(failed_subtests) == len(test.subtests): + return [full_name] + + all_failures = [] # type: List[str] + for t in failed_subtests: + all_failures.extend(failed_names(t, full_name)) + return all_failures + + failures = failed_names(test, '') + # If there are too many failures, printing them out will just be noisy. + if len(failures) > 10: # this is an arbitrary limit + return '' + + return 'Failures: ' + ', '.join(failures) + + def print_summary_line(test: Test) -> None: """ Prints summary line of test object. Color of line is dependent on @@ -587,6 +621,15 @@ def print_summary_line(test: Test) -> None: color = stdout.red stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) + # Summarize failures that might have gone off-screen since we had a lot + # of tests (arbitrarily defined as >=100 for now). + if test.ok_status() or test.counts.total() < 100: + return + summarized = _summarize_failed_tests(test) + if not summarized: + return + stdout.print_with_timestamp(color(summarized)) + # Other methods: def bubble_up_test_results(test: Test) -> None: @@ -609,7 +652,7 @@ def bubble_up_test_results(test: Test) -> None: elif test.counts.get_status() == TestStatus.TEST_CRASHED: test.status = TestStatus.TEST_CRASHED -def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: +def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool) -> Test: """ Finds next test to parse in LineStream, creates new Test object, parses any subtests of the test, populates Test object with all @@ -627,15 +670,32 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: 1..4 [subtests] - - Subtest header line + - Subtest header (must include either the KTAP version line or + "# Subtest" header line) - Example: + Example (preferred format with both KTAP version line and + "# Subtest" line): + KTAP version 1 # Subtest: name 1..3 [subtests] ok 1 name + Example (only "# Subtest" line): + + # Subtest: name + 1..3 + [subtests] + ok 1 name + + Example (only KTAP version line, compliant with KTAP v1 spec): + + KTAP version 1 + 1..3 + [subtests] + ok 1 name + - Test result line Example: @@ -647,28 +707,29 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: expected_num - expected test number for test to be parsed log - list of strings containing any preceding diagnostic lines corresponding to the current test + is_subtest - boolean indicating whether test is a subtest Return: Test object populated with characteristics and any subtests """ test = Test() test.log.extend(log) - parent_test = False - main = parse_ktap_header(lines, test) - if main: - # If KTAP/TAP header is found, attempt to parse + if not is_subtest: + # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" + ktap_line = parse_ktap_header(lines, test) parse_test_plan(lines, test) parent_test = True else: - # If KTAP/TAP header is not found, test must be subtest - # header or test result line so parse attempt to parser - # subtest header - parent_test = parse_test_header(lines, test) + # If not the main test, attempt to parse a test header containing + # the KTAP version line and/or subtest header line + ktap_line = parse_ktap_header(lines, test) + subtest_line = parse_test_header(lines, test) + parent_test = (ktap_line or subtest_line) if parent_test: - # If subtest header is found, attempt to parse - # test plan and print header + # If KTAP version line and/or subtest header is found, attempt + # to parse test plan and print test header parse_test_plan(lines, test) print_test_header(test) expected_count = test.expected_count @@ -683,7 +744,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: sub_log = parse_diagnostic(lines) sub_test = Test() if not lines or (peek_test_name_match(lines, test) and - not main): + is_subtest): if expected_count and test_num <= expected_count: # If parser reaches end of test before # parsing expected number of subtests, print @@ -697,20 +758,19 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: test.log.extend(sub_log) break else: - sub_test = parse_test(lines, test_num, sub_log) + sub_test = parse_test(lines, test_num, sub_log, True) subtests.append(sub_test) test_num += 1 test.subtests = subtests - if not main: + if is_subtest: # If not main test, look for test result line test.log.extend(parse_diagnostic(lines)) - if (parent_test and peek_test_name_match(lines, test)) or \ - not parent_test: - parse_test_result(lines, test, expected_num) - else: + if test.name != "" and not peek_test_name_match(lines, test): test.add_error('missing subtest result line!') + else: + parse_test_result(lines, test, expected_num) - # Check for there being no tests + # Check for there being no subtests within parent test if parent_test and len(subtests) == 0: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() @@ -720,11 +780,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) - if parent_test and not main: + if parent_test and is_subtest: # If test has subtests and is not the main test object, print # footer. print_test_footer(test) - elif not main: + elif is_subtest: print_test_result(test) return test @@ -744,10 +804,10 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test: test = Test() if not lines: test.name = '<missing>' - test.add_error('could not find any KTAP output!') + test.add_error('Could not find any KTAP output. Did any KUnit tests run?') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - test = parse_test(lines, 0, []) + test = parse_test(lines, 0, [], False) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() stdout.print_with_timestamp(DIVIDER) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index e2cd2cc2e98f..0c2190514103 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -80,6 +80,13 @@ class KconfigTest(unittest.TestCase): self.assertEqual(actual_kconfig, expected_kconfig) class KUnitParserTest(unittest.TestCase): + def setUp(self): + self.print_mock = mock.patch('kunit_printer.Printer.print').start() + self.addCleanup(mock.patch.stopall) + + def noPrintCallContains(self, substr: str): + for call in self.print_mock.mock_calls: + self.assertNotIn(substr, call.args[0]) def assertContains(self, needle: str, haystack: kunit_parser.LineStream): # Clone the iterator so we can print the contents on failure. @@ -133,33 +140,29 @@ class KUnitParserTest(unittest.TestCase): all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_parse_successful_nested_tests_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') with open(all_passed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_kselftest_nested(self): kselftest_log = test_data_path('test_is_test_passed-kselftest.log') with open(kselftest_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.FAILURE, - result.status) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + self.assertEqual(result.counts.errors, 0) def test_no_header(self): empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') @@ -167,9 +170,8 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) self.assertEqual(0, len(result.subtests)) - self.assertEqual( - kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, - result.status) + self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) + self.assertEqual(result.counts.errors, 1) def test_missing_test_plan(self): missing_plan_log = test_data_path('test_is_test_passed-' @@ -179,12 +181,8 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.extract_tap_lines( file.readlines())) # A missing test plan is not an error. - self.assertEqual(0, result.counts.errors) - # All tests should be accounted for. - self.assertEqual(10, result.counts.total()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) def test_no_tests(self): header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') @@ -192,9 +190,8 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) self.assertEqual(0, len(result.subtests)) - self.assertEqual( - kunit_parser.TestStatus.NO_TESTS, - result.status) + self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status) + self.assertEqual(result.counts.errors, 1) def test_no_tests_no_plan(self): no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') @@ -205,7 +202,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.subtests[0].subtests[0].status) - self.assertEqual(1, result.counts.errors) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=1)) def test_no_kunit_output(self): @@ -214,9 +211,10 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('could not find any KTAP output!')) + print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) print_mock.stop() self.assertEqual(0, len(result.subtests)) + self.assertEqual(result.counts.errors, 1) def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') @@ -224,18 +222,16 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=4, skipped=1)) def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') with open(skipped_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SKIPPED, - result.status) + self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5)) def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') @@ -243,71 +239,112 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual( "sysctl_test", result.subtests[0].name) self.assertEqual( "example", result.subtests[1].name) - file.close() - def test_ignores_prefix_printk_time(self): prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.TEST_CRASHED, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertGreaterEqual(result.counts.errors, 1) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) + + def test_summarize_failures(self): + output = """ + KTAP version 1 + 1..2 + # Subtest: all_failed_suite + 1..2 + not ok 1 - test1 + not ok 2 - test2 + not ok 1 - all_failed_suite + # Subtest: some_failed_suite + 1..2 + ok 1 - test1 + not ok 2 - test2 + not ok 1 - some_failed_suite + """ + result = kunit_parser.parse_run_tests(output.splitlines()) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + + self.assertEqual(kunit_parser._summarize_failed_tests(result), + 'Failures: all_failed_suite, some_failed_suite.test2') + + def test_ktap_format(self): + ktap_log = test_data_path('test_parse_ktap_output.log') + with open(ktap_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3)) + self.assertEqual('suite', result.subtests[0].name) + self.assertEqual('case_1', result.subtests[0].subtests[0].name) + self.assertEqual('case_2', result.subtests[0].subtests[1].name) + + def test_parse_subtest_header(self): + ktap_log = test_data_path('test_parse_subtest_header.log') + with open(ktap_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) + + def test_show_test_output_on_failure(self): + output = """ + KTAP version 1 + 1..1 + Test output. + Indented more. + not ok 1 test1 + """ + result = kunit_parser.parse_run_tests(output.splitlines()) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + + self.print_mock.assert_any_call(StrContains('Test output.')) + self.print_mock.assert_any_call(StrContains(' Indented more.')) + self.noPrintCallContains('not ok 1 test1') def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) @@ -485,6 +522,9 @@ class LinuxSourceTreeTest(unittest.TestCase): class KUnitJsonTest(unittest.TestCase): + def setUp(self): + self.print_mock = mock.patch('kunit_printer.Printer.print').start() + self.addCleanup(mock.patch.stopall) def _json_for(self, log_file): with open(test_data_path(log_file)) as file: @@ -581,7 +621,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.print_mock.assert_any_call(StrContains('could not find any KTAP output!')) + self.print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) def test_exec_no_tests(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) diff --git a/tools/testing/kunit/test_data/test_parse_ktap_output.log b/tools/testing/kunit/test_data/test_parse_ktap_output.log new file mode 100644 index 000000000000..ccdf244e5303 --- /dev/null +++ b/tools/testing/kunit/test_data/test_parse_ktap_output.log @@ -0,0 +1,8 @@ +KTAP version 1 +1..1 + KTAP version 1 + 1..3 + ok 1 case_1 + ok 2 case_2 + ok 3 case_3 +ok 1 suite diff --git a/tools/testing/kunit/test_data/test_parse_subtest_header.log b/tools/testing/kunit/test_data/test_parse_subtest_header.log new file mode 100644 index 000000000000..216631092e7b --- /dev/null +++ b/tools/testing/kunit/test_data/test_parse_subtest_header.log @@ -0,0 +1,7 @@ +KTAP version 1 +1..1 + KTAP version 1 + # Subtest: suite + 1..1 + ok 1 test +ok 1 suite
\ No newline at end of file diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 246f7ac8489b..2310ac4d080e 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \ LDFLAGS += -fsanitize=address -fsanitize=undefined TARGETS = main TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \ - tests/basic_api.o tests/common.o + tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o OFILES = main.o $(DEP_OFILES) $(TEST_OFILES) EXTR_SRC = ../../../mm/memblock.c diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index 33044c634ea7..e306c90c535f 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -1,17 +1,5 @@ TODO ===== -1. Add tests trying to memblock_add() or memblock_reserve() 129th region. - This will trigger memblock_double_array(), make sure it succeeds. - *Important:* These tests require valid memory ranges, use dummy physical - memory block from common.c to implement them. It is also very - likely that the current MEM_SIZE won't be enough for these - test cases. Use realloc to adjust the size accordingly. - -2. Add test cases using this functions (implement them for both directions): - + memblock_alloc_raw() - + memblock_alloc_exact_nid_raw() - + memblock_alloc_try_nid_raw() - -3. Add tests for memblock_alloc_node() to check if the correct NUMA node is set +1. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c index 4ca1024342b1..278f9dec5008 100644 --- a/tools/testing/memblock/main.c +++ b/tools/testing/memblock/main.c @@ -3,6 +3,7 @@ #include "tests/alloc_api.h" #include "tests/alloc_helpers_api.h" #include "tests/alloc_nid_api.h" +#include "tests/alloc_exact_nid_api.h" #include "tests/common.h" int main(int argc, char **argv) @@ -12,6 +13,7 @@ int main(int argc, char **argv) memblock_alloc_checks(); memblock_alloc_helpers_checks(); memblock_alloc_nid_checks(); + memblock_alloc_exact_nid_checks(); return 0; } diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c new file mode 100644 index 000000000000..6e14447da6e1 --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -0,0 +1,1113 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "alloc_exact_nid_api.h" +#include "alloc_nid_api.h" + +#define FUNC_NAME "memblock_alloc_exact_nid_raw" + +/* + * contains the fraction of MEM_SIZE contained in each node in basis point + * units (one hundredth of 1% or 1/10000) + */ +static const unsigned int node_fractions[] = { + 2500, /* 1/4 */ + 625, /* 1/16 */ + 1250, /* 1/8 */ + 1250, /* 1/8 */ + 625, /* 1/16 */ + 625, /* 1/16 */ + 2500, /* 1/4 */ + 625, /* 1/16 */ +}; + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+----------+ + * + * | +------------------+ +-----+ | + * | | reserved | | new | | + * +-----------+------------------+--------------+-----+----------+ + * + * Expect to allocate an aligned region at the end of the requested node. The + * region count and total size get updated. + */ +static int alloc_exact_nid_top_down_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------------------+-----------+--------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node_end - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+----------+ + * + + + * | +---------+ | + * | | rgn | | + * +----------+---------+-------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * | +-----+ | + * | | rgn | | + * +-----+-----+----------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+---------+ + * + * | +------------------+-----+ | + * | | reserved | new | | + * +-----------+------------------+-----+------------------------+ + * + * Expect to allocate an aligned region in the requested node that merges with + * the existing reserved region. The total size gets updated. + */ +static int alloc_exact_nid_bottom_up_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t total_size; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + total_size = size + r1.size; + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------+-----------+--------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region at the beginning + * of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), req_node_end); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+---------+ + * + + + * | +---------+ | + * | | rgn | | + * +----+---------+------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * |-----+ | + * | rgn | | + * +-----+----------------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size: + * + * | +-----+ | + * | | req | | + * +---+-----+----------------------------+ + * + * +---------+ + * | rgn | + * +---------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_small_node_generic_check(void) +{ + int nid_req = 1; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_2 * req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is fully reserved: + * + * | +---------+ | + * | |requested| | + * +--------------+---------+-------------+ + * + * | +---------+ | + * | | reserved| | + * +--------------+---------+-------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_node_reserved_generic_check(void) +{ + int nid_req = 2; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(req_node->base, req_node->size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved and does not have enough contiguous memory for the + * allocated region: + * + * | +-----------------------+ | + * | | requested | | + * +-----------+-----------------------+----+ + * + * | +----------+ | + * | | reserved | | + * +-----------------+----------+-----------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_part_reserved_fail_generic_check(void) +{ + int nid_req = 4; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_2; + r1.base = req_node->base + (size / SZ_2); + r1.size = size; + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the second + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +--------------------------+---------+ | + * | | first node |requested| | + * +------+--------------------------+---------+----------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_range_high_generic_check(void) +{ + int nid_req = 3; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = req_node->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node starts + * after max_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * | +----------+----...----+----------+ +-----------+ | + * | | min node | ... | max node | | requested | | + * +-----+----------+----...----+----------+--------+-----------+---+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_no_overlap_high_generic_check(void) +{ + int nid_req = 7; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size. + * Additionally, none of the nodes have enough memory to allocate the region: + * + * +-----------------------------------+ + * | new | + * +-----------------------------------+ + * |-------+-------+-------+-------+-------+-------+-------+-------| + * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_large_region_generic_check(void) +{ + int nid_req = 3; + void *allocated_ptr = NULL; + phys_addr_t size = MEM_SIZE / SZ_2; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_addr range when + * there are two reserved regions at the borders. The requested node starts at + * min_addr and ends at max_addr and is the same size as the region to be + * allocated: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------+-----------------------+-----------------------| + * | | node5 | requested | node7 | + * +------+-----------+-----------------------+-----------------------+ + * + + + * | +----+-----------------------+----+ | + * | | r2 | new | r1 | | + * +-------------+----+-----------------------+----+------------------+ + * + * Expect to merge all of the regions into one. The region counter and total + * size fields get updated. + */ +static int alloc_exact_nid_numa_reserved_full_merge_generic_check(void) +{ + int nid_req = 6; + int nid_next = nid_req + 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *next_node = &memblock.memory.regions[nid_next]; + void *allocated_ptr = NULL; + struct region r1, r2; + phys_addr_t size = req_node->size; + phys_addr_t total_size; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r1.base = next_node->base; + r1.size = SZ_128; + + r2.size = SZ_128; + r2.base = r1.base - (size + r2.size); + + total_size = r1.size + r2.size + size; + min_addr = r2.base + r2.size; + max_addr = r1.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, r2.base); + + ASSERT_LE(new_rgn->base, req_node->base); + ASSERT_LE(region_end(req_node), region_end(new_rgn)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range, + * where the total range can fit the region, but it is split between two nodes + * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE + * instead of requesting a specific node: + * + * +-----------+ + * | new | + * +-----------+ + * | +---------------------+-----------| + * | | prev node | next node | + * +------+---------------------+-----------+ + * + + + * |----------------------+ +-----| + * | r1 | | r2 | + * +----------------------+-----------+-----+ + * ^ ^ + * | | + * | max_addr + * | + * min_addr + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_all_reserved_generic_check(void) +{ + void *allocated_ptr = NULL; + struct memblock_region *next_node = &memblock.memory.regions[7]; + struct region r1, r2; + phys_addr_t size = SZ_256; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r2.base = next_node->base + SZ_128; + r2.size = memblock_end_of_DRAM() - r2.base; + + r1.size = MEM_SIZE - (r2.size + size); + r1.base = memblock_start_of_DRAM(); + + min_addr = r1.base + r1.size; + max_addr = r2.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* Test case wrappers for NUMA tests */ +static int alloc_exact_nid_numa_simple_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_simple_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_simple_check(); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_part_reserved_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_part_reserved_check(); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_split_range_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_split_range_low_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_split_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_split_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_split_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_low_check(); + + return 0; +} + +static int alloc_exact_nid_numa_small_node_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_small_node_generic_check); + run_bottom_up(alloc_exact_nid_numa_small_node_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_node_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_node_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_node_reserved_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_fail_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_part_reserved_fail_generic_check); + run_bottom_up(alloc_exact_nid_numa_part_reserved_fail_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_range_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_range_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_no_overlap_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_no_overlap_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_large_region_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_large_region_generic_check); + run_bottom_up(alloc_exact_nid_numa_large_region_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_reserved_full_merge_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_exact_nid_numa_reserved_full_merge_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_all_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_all_reserved_generic_check); + + return 0; +} + +int __memblock_alloc_exact_nid_numa_checks(void) +{ + test_print("Running %s NUMA tests...\n", FUNC_NAME); + + alloc_exact_nid_numa_simple_check(); + alloc_exact_nid_numa_part_reserved_check(); + alloc_exact_nid_numa_split_range_low_check(); + alloc_exact_nid_numa_no_overlap_split_check(); + alloc_exact_nid_numa_no_overlap_low_check(); + + alloc_exact_nid_numa_small_node_check(); + alloc_exact_nid_numa_node_reserved_check(); + alloc_exact_nid_numa_part_reserved_fail_check(); + alloc_exact_nid_numa_split_range_high_check(); + alloc_exact_nid_numa_no_overlap_high_check(); + alloc_exact_nid_numa_large_region_check(); + alloc_exact_nid_numa_reserved_full_merge_check(); + alloc_exact_nid_numa_split_all_reserved_check(); + + return 0; +} + +int memblock_alloc_exact_nid_checks(void) +{ + prefix_reset(); + prefix_push(FUNC_NAME); + + reset_memblock_attributes(); + dummy_physical_memory_init(); + + memblock_alloc_exact_nid_range_checks(); + memblock_alloc_exact_nid_numa_checks(); + + dummy_physical_memory_cleanup(); + + prefix_pop(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h new file mode 100644 index 000000000000..cef419d55d2a --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _MEMBLOCK_ALLOC_EXACT_NID_H +#define _MEMBLOCK_ALLOC_EXACT_NID_H + +#include "common.h" + +int memblock_alloc_exact_nid_checks(void); +int __memblock_alloc_exact_nid_numa_checks(void); + +#ifdef CONFIG_NUMA +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + __memblock_alloc_exact_nid_numa_checks(); + return 0; +} + +#else +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ + +#endif diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 2c2d60f4e3e3..49ef68cccd6f 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -18,18 +18,29 @@ static const unsigned int node_fractions[] = { 625, /* 1/16 */ }; -static inline const char * const get_memblock_alloc_try_nid_name(int flags) +static inline const char * const get_memblock_alloc_nid_name(int flags) { + if (flags & TEST_F_EXACT) + return "memblock_alloc_exact_nid_raw"; if (flags & TEST_F_RAW) return "memblock_alloc_try_nid_raw"; return "memblock_alloc_try_nid"; } -static inline void *run_memblock_alloc_try_nid(phys_addr_t size, - phys_addr_t align, - phys_addr_t min_addr, - phys_addr_t max_addr, int nid) -{ +static inline void *run_memblock_alloc_nid(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr, + phys_addr_t max_addr, int nid) +{ + assert(!(alloc_nid_test_flags & TEST_F_EXACT) || + (alloc_nid_test_flags & TEST_F_RAW)); + /* + * TEST_F_EXACT should be checked before TEST_F_RAW since + * memblock_alloc_exact_nid_raw() performs raw allocations. + */ + if (alloc_nid_test_flags & TEST_F_EXACT) + return memblock_alloc_exact_nid_raw(size, align, min_addr, + max_addr, nid); if (alloc_nid_test_flags & TEST_F_RAW) return memblock_alloc_try_nid_raw(size, align, min_addr, max_addr, nid); @@ -50,7 +61,7 @@ static inline void *run_memblock_alloc_try_nid(phys_addr_t size, * * Expect to allocate a region that ends at max_addr. */ -static int alloc_try_nid_top_down_simple_check(void) +static int alloc_nid_top_down_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -65,9 +76,9 @@ static int alloc_try_nid_top_down_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -102,7 +113,7 @@ static int alloc_try_nid_top_down_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_top_down_end_misaligned_check(void) +static int alloc_nid_top_down_end_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -118,9 +129,9 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512 + misalign; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -153,7 +164,7 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) * Expect to allocate a region that starts at min_addr and ends at * max_addr, given that min_addr is aligned. */ -static int alloc_try_nid_exact_address_generic_check(void) +static int alloc_nid_exact_address_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -168,9 +179,9 @@ static int alloc_try_nid_exact_address_generic_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -205,7 +216,7 @@ static int alloc_try_nid_exact_address_generic_check(void) * Expect to drop the lower limit and allocate a memory region which * ends at max_addr (if the address is aligned). */ -static int alloc_try_nid_top_down_narrow_range_check(void) +static int alloc_nid_top_down_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -219,9 +230,9 @@ static int alloc_try_nid_top_down_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -257,7 +268,7 @@ static int alloc_try_nid_top_down_narrow_range_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_low_max_generic_check(void) +static int alloc_nid_low_max_generic_check(void) { void *allocated_ptr = NULL; phys_addr_t size = SZ_1K; @@ -270,9 +281,9 @@ static int alloc_try_nid_low_max_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -295,7 +306,7 @@ static int alloc_try_nid_low_max_generic_check(void) * * Expect a merge of both regions. Only the region size gets updated. */ -static int alloc_try_nid_min_reserved_generic_check(void) +static int alloc_nid_min_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -315,9 +326,9 @@ static int alloc_try_nid_min_reserved_generic_check(void) memblock_reserve(reserved_base, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -347,7 +358,7 @@ static int alloc_try_nid_min_reserved_generic_check(void) * * Expect a merge of regions. Only the region size gets updated. */ -static int alloc_try_nid_max_reserved_generic_check(void) +static int alloc_nid_max_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -365,9 +376,9 @@ static int alloc_try_nid_max_reserved_generic_check(void) memblock_reserve(max_addr, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -400,7 +411,7 @@ static int alloc_try_nid_max_reserved_generic_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_top_down_reserved_with_space_check(void) +static int alloc_nid_top_down_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -428,9 +439,9 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -465,7 +476,7 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_reserved_full_merge_generic_check(void) +static int alloc_nid_reserved_full_merge_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -491,9 +502,9 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -527,7 +538,7 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) * Expect to merge the new region with r2. The second region does not get * updated. The total size counter gets updated. */ -static int alloc_try_nid_top_down_reserved_no_space_check(void) +static int alloc_nid_top_down_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -555,9 +566,9 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -596,7 +607,7 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) * Expect no allocation to happen. */ -static int alloc_try_nid_reserved_all_generic_check(void) +static int alloc_nid_reserved_all_generic_check(void) { void *allocated_ptr = NULL; struct region r1, r2; @@ -620,9 +631,9 @@ static int alloc_try_nid_reserved_all_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -636,7 +647,7 @@ static int alloc_try_nid_reserved_all_generic_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_max_check(void) +static int alloc_nid_top_down_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -650,9 +661,9 @@ static int alloc_try_nid_top_down_cap_max_check(void) min_addr = memblock_end_of_DRAM() - SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -673,7 +684,7 @@ static int alloc_try_nid_top_down_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_min_check(void) +static int alloc_nid_top_down_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -687,9 +698,9 @@ static int alloc_try_nid_top_down_cap_min_check(void) min_addr = memblock_start_of_DRAM() - SZ_256; max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -719,7 +730,7 @@ static int alloc_try_nid_top_down_cap_min_check(void) * * Expect to allocate a region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_simple_check(void) +static int alloc_nid_bottom_up_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -734,9 +745,9 @@ static int alloc_try_nid_bottom_up_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -771,7 +782,7 @@ static int alloc_try_nid_bottom_up_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_start_misaligned_check(void) +static int alloc_nid_bottom_up_start_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -787,9 +798,9 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) min_addr = memblock_start_of_DRAM() + misalign; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -824,7 +835,7 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) * Expect to drop the lower limit and allocate a memory region which * starts at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_narrow_range_check(void) +static int alloc_nid_bottom_up_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -838,9 +849,9 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -873,7 +884,7 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_bottom_up_reserved_with_space_check(void) +static int alloc_nid_bottom_up_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -901,9 +912,9 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -942,7 +953,7 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) * Other regions are not modified. */ -static int alloc_try_nid_bottom_up_reserved_no_space_check(void) +static int alloc_nid_bottom_up_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[2]; struct memblock_region *rgn2 = &memblock.reserved.regions[1]; @@ -971,9 +982,9 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -1000,7 +1011,7 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that starts at the min_addr. */ -static int alloc_try_nid_bottom_up_cap_max_check(void) +static int alloc_nid_bottom_up_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1014,9 +1025,9 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) min_addr = memblock_start_of_DRAM() + SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1037,7 +1048,7 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_cap_min_check(void) +static int alloc_nid_bottom_up_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1051,9 +1062,9 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM() - SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1070,133 +1081,133 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) } /* Test case wrappers for range tests */ -static int alloc_try_nid_simple_check(void) +static int alloc_nid_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_simple_check(); + alloc_nid_top_down_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_simple_check(); + alloc_nid_bottom_up_simple_check(); return 0; } -static int alloc_try_nid_misaligned_check(void) +static int alloc_nid_misaligned_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_end_misaligned_check(); + alloc_nid_top_down_end_misaligned_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_start_misaligned_check(); + alloc_nid_bottom_up_start_misaligned_check(); return 0; } -static int alloc_try_nid_narrow_range_check(void) +static int alloc_nid_narrow_range_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_narrow_range_check(); + alloc_nid_top_down_narrow_range_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_narrow_range_check(); + alloc_nid_bottom_up_narrow_range_check(); return 0; } -static int alloc_try_nid_reserved_with_space_check(void) +static int alloc_nid_reserved_with_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_with_space_check(); + alloc_nid_top_down_reserved_with_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_with_space_check(); + alloc_nid_bottom_up_reserved_with_space_check(); return 0; } -static int alloc_try_nid_reserved_no_space_check(void) +static int alloc_nid_reserved_no_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_no_space_check(); + alloc_nid_top_down_reserved_no_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_no_space_check(); + alloc_nid_bottom_up_reserved_no_space_check(); return 0; } -static int alloc_try_nid_cap_max_check(void) +static int alloc_nid_cap_max_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_max_check(); + alloc_nid_top_down_cap_max_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_max_check(); + alloc_nid_bottom_up_cap_max_check(); return 0; } -static int alloc_try_nid_cap_min_check(void) +static int alloc_nid_cap_min_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_min_check(); + alloc_nid_top_down_cap_min_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_min_check(); + alloc_nid_bottom_up_cap_min_check(); return 0; } -static int alloc_try_nid_min_reserved_check(void) +static int alloc_nid_min_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_min_reserved_generic_check); - run_bottom_up(alloc_try_nid_min_reserved_generic_check); + run_top_down(alloc_nid_min_reserved_generic_check); + run_bottom_up(alloc_nid_min_reserved_generic_check); return 0; } -static int alloc_try_nid_max_reserved_check(void) +static int alloc_nid_max_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_max_reserved_generic_check); - run_bottom_up(alloc_try_nid_max_reserved_generic_check); + run_top_down(alloc_nid_max_reserved_generic_check); + run_bottom_up(alloc_nid_max_reserved_generic_check); return 0; } -static int alloc_try_nid_exact_address_check(void) +static int alloc_nid_exact_address_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_exact_address_generic_check); - run_bottom_up(alloc_try_nid_exact_address_generic_check); + run_top_down(alloc_nid_exact_address_generic_check); + run_bottom_up(alloc_nid_exact_address_generic_check); return 0; } -static int alloc_try_nid_reserved_full_merge_check(void) +static int alloc_nid_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_reserved_full_merge_generic_check); + run_top_down(alloc_nid_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_reserved_all_check(void) +static int alloc_nid_reserved_all_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_all_generic_check); - run_bottom_up(alloc_try_nid_reserved_all_generic_check); + run_top_down(alloc_nid_reserved_all_generic_check); + run_bottom_up(alloc_nid_reserved_all_generic_check); return 0; } -static int alloc_try_nid_low_max_check(void) +static int alloc_nid_low_max_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_low_max_generic_check); - run_bottom_up(alloc_try_nid_low_max_generic_check); + run_top_down(alloc_nid_low_max_generic_check); + run_bottom_up(alloc_nid_low_max_generic_check); return 0; } @@ -1204,22 +1215,22 @@ static int alloc_try_nid_low_max_check(void) static int memblock_alloc_nid_range_checks(void) { test_print("Running %s range tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_simple_check(); - alloc_try_nid_misaligned_check(); - alloc_try_nid_narrow_range_check(); - alloc_try_nid_reserved_with_space_check(); - alloc_try_nid_reserved_no_space_check(); - alloc_try_nid_cap_max_check(); - alloc_try_nid_cap_min_check(); + alloc_nid_simple_check(); + alloc_nid_misaligned_check(); + alloc_nid_narrow_range_check(); + alloc_nid_reserved_with_space_check(); + alloc_nid_reserved_no_space_check(); + alloc_nid_cap_max_check(); + alloc_nid_cap_min_check(); - alloc_try_nid_min_reserved_check(); - alloc_try_nid_max_reserved_check(); - alloc_try_nid_exact_address_check(); - alloc_try_nid_reserved_full_merge_check(); - alloc_try_nid_reserved_all_check(); - alloc_try_nid_low_max_check(); + alloc_nid_min_reserved_check(); + alloc_nid_max_reserved_check(); + alloc_nid_exact_address_check(); + alloc_nid_reserved_full_merge_check(); + alloc_nid_reserved_all_check(); + alloc_nid_low_max_check(); return 0; } @@ -1229,7 +1240,7 @@ static int memblock_alloc_nid_range_checks(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the end of the requested node. */ -static int alloc_try_nid_top_down_numa_simple_check(void) +static int alloc_nid_top_down_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1247,8 +1258,8 @@ static int alloc_try_nid_top_down_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1280,7 +1291,7 @@ static int alloc_try_nid_top_down_numa_simple_check(void) * Expect to allocate an aligned region at the end of the last node that has * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_small_node_check(void) +static int alloc_nid_top_down_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 6; @@ -1299,8 +1310,8 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1333,7 +1344,7 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) * large enough and has enough unreserved memory (in this case, nid = 6) after * falling back to NUMA_NO_NODE. The region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_node_reserved_check(void) +static int alloc_nid_top_down_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 6; @@ -1353,8 +1364,8 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1386,7 +1397,7 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) * Expect to allocate an aligned region at the end of the requested node. The * region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_check(void) +static int alloc_nid_top_down_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[1]; @@ -1408,8 +1419,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1444,7 +1455,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count * and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) +static int alloc_nid_top_down_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = NUMA_NODES - 1; @@ -1469,8 +1480,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1507,7 +1518,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_split_range_low_check(void) +static int alloc_nid_top_down_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1525,8 +1536,8 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1563,7 +1574,7 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region that * ends at the end of the first node that overlaps with the range. */ -static int alloc_try_nid_top_down_numa_split_range_high_check(void) +static int alloc_nid_top_down_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = nid_req - 1; @@ -1582,8 +1593,8 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) min_addr = exp_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1620,7 +1631,7 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) +static int alloc_nid_top_down_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1638,8 +1649,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1677,7 +1688,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) +static int alloc_nid_top_down_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1694,8 +1705,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1733,7 +1744,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) +static int alloc_nid_top_down_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1750,8 +1761,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1773,7 +1784,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_simple_check(void) +static int alloc_nid_bottom_up_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1791,8 +1802,8 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1824,7 +1835,7 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) * Expect to allocate an aligned region at the beginning of the first node that * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_small_node_check(void) +static int alloc_nid_bottom_up_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 0; @@ -1843,8 +1854,8 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1878,7 +1889,7 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) * after falling back to NUMA_NO_NODE. The region count and total size get * updated. */ -static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) +static int alloc_nid_bottom_up_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 0; @@ -1898,8 +1909,8 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1931,7 +1942,7 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) * Expect to allocate an aligned region in the requested node that merges with * the existing reserved region. The total size gets updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1955,8 +1966,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) total_size = size + r1.size; memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1991,7 +2002,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size * get updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = 0; @@ -2016,8 +2027,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2054,7 +2065,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the requested node. */ -static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) +static int alloc_nid_bottom_up_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2072,8 +2083,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2110,7 +2121,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the first node that has enough memory. */ -static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) +static int alloc_nid_bottom_up_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = 0; @@ -2130,8 +2141,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) min_addr = req_node->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2168,7 +2179,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that starts at * the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2186,8 +2197,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2225,7 +2236,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2242,8 +2253,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2281,7 +2292,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2298,8 +2309,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2330,7 +2341,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_large_region_generic_check(void) +static int alloc_nid_numa_large_region_generic_check(void) { int nid_req = 3; void *allocated_ptr = NULL; @@ -2344,8 +2355,8 @@ static int alloc_try_nid_numa_large_region_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_EQ(allocated_ptr, NULL); test_pass_pop(); @@ -2374,7 +2385,7 @@ static int alloc_try_nid_numa_large_region_generic_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) +static int alloc_nid_numa_reserved_full_merge_generic_check(void) { int nid_req = 6; int nid_next = nid_req + 1; @@ -2404,8 +2415,8 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2448,7 +2459,7 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_split_all_reserved_generic_check(void) +static int alloc_nid_numa_split_all_reserved_generic_check(void) { void *allocated_ptr = NULL; struct memblock_region *next_node = &memblock.memory.regions[7]; @@ -2472,9 +2483,9 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -2484,139 +2495,139 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) } /* Test case wrappers for NUMA tests */ -static int alloc_try_nid_numa_simple_check(void) +static int alloc_nid_numa_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_simple_check(); + alloc_nid_top_down_numa_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_simple_check(); + alloc_nid_bottom_up_numa_simple_check(); return 0; } -static int alloc_try_nid_numa_small_node_check(void) +static int alloc_nid_numa_small_node_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_small_node_check(); + alloc_nid_top_down_numa_small_node_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_small_node_check(); + alloc_nid_bottom_up_numa_small_node_check(); return 0; } -static int alloc_try_nid_numa_node_reserved_check(void) +static int alloc_nid_numa_node_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_node_reserved_check(); + alloc_nid_top_down_numa_node_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_node_reserved_check(); + alloc_nid_bottom_up_numa_node_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_check(void) +static int alloc_nid_numa_part_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_check(); + alloc_nid_top_down_numa_part_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_check(); + alloc_nid_bottom_up_numa_part_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_fallback_check(void) +static int alloc_nid_numa_part_reserved_fallback_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_fallback_check(); + alloc_nid_top_down_numa_part_reserved_fallback_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(); + alloc_nid_bottom_up_numa_part_reserved_fallback_check(); return 0; } -static int alloc_try_nid_numa_split_range_low_check(void) +static int alloc_nid_numa_split_range_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_low_check(); + alloc_nid_top_down_numa_split_range_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_low_check(); + alloc_nid_bottom_up_numa_split_range_low_check(); return 0; } -static int alloc_try_nid_numa_split_range_high_check(void) +static int alloc_nid_numa_split_range_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_high_check(); + alloc_nid_top_down_numa_split_range_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_high_check(); + alloc_nid_bottom_up_numa_split_range_high_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_split_check(void) +static int alloc_nid_numa_no_overlap_split_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_split_check(); + alloc_nid_top_down_numa_no_overlap_split_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_split_check(); + alloc_nid_bottom_up_numa_no_overlap_split_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_low_check(void) +static int alloc_nid_numa_no_overlap_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_low_check(); + alloc_nid_top_down_numa_no_overlap_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_low_check(); + alloc_nid_bottom_up_numa_no_overlap_low_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_high_check(void) +static int alloc_nid_numa_no_overlap_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_high_check(); + alloc_nid_top_down_numa_no_overlap_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_high_check(); + alloc_nid_bottom_up_numa_no_overlap_high_check(); return 0; } -static int alloc_try_nid_numa_large_region_check(void) +static int alloc_nid_numa_large_region_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_large_region_generic_check); - run_bottom_up(alloc_try_nid_numa_large_region_generic_check); + run_top_down(alloc_nid_numa_large_region_generic_check); + run_bottom_up(alloc_nid_numa_large_region_generic_check); return 0; } -static int alloc_try_nid_numa_reserved_full_merge_check(void) +static int alloc_nid_numa_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_numa_reserved_full_merge_generic_check); + run_top_down(alloc_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_numa_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_numa_split_all_reserved_check(void) +static int alloc_nid_numa_split_all_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_split_all_reserved_generic_check); - run_bottom_up(alloc_try_nid_numa_split_all_reserved_generic_check); + run_top_down(alloc_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_nid_numa_split_all_reserved_generic_check); return 0; } @@ -2624,22 +2635,22 @@ static int alloc_try_nid_numa_split_all_reserved_check(void) int __memblock_alloc_nid_numa_checks(void) { test_print("Running %s NUMA tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_numa_simple_check(); - alloc_try_nid_numa_small_node_check(); - alloc_try_nid_numa_node_reserved_check(); - alloc_try_nid_numa_part_reserved_check(); - alloc_try_nid_numa_part_reserved_fallback_check(); - alloc_try_nid_numa_split_range_low_check(); - alloc_try_nid_numa_split_range_high_check(); + alloc_nid_numa_simple_check(); + alloc_nid_numa_small_node_check(); + alloc_nid_numa_node_reserved_check(); + alloc_nid_numa_part_reserved_check(); + alloc_nid_numa_part_reserved_fallback_check(); + alloc_nid_numa_split_range_low_check(); + alloc_nid_numa_split_range_high_check(); - alloc_try_nid_numa_no_overlap_split_check(); - alloc_try_nid_numa_no_overlap_low_check(); - alloc_try_nid_numa_no_overlap_high_check(); - alloc_try_nid_numa_large_region_check(); - alloc_try_nid_numa_reserved_full_merge_check(); - alloc_try_nid_numa_split_all_reserved_check(); + alloc_nid_numa_no_overlap_split_check(); + alloc_nid_numa_no_overlap_low_check(); + alloc_nid_numa_no_overlap_high_check(); + alloc_nid_numa_large_region_check(); + alloc_nid_numa_reserved_full_merge_check(); + alloc_nid_numa_split_all_reserved_check(); return 0; } @@ -2649,7 +2660,7 @@ static int memblock_alloc_nid_checks_internal(int flags) alloc_nid_test_flags = flags; prefix_reset(); - prefix_push(get_memblock_alloc_try_nid_name(flags)); + prefix_push(get_memblock_alloc_nid_name(flags)); reset_memblock_attributes(); dummy_physical_memory_init(); @@ -2671,3 +2682,12 @@ int memblock_alloc_nid_checks(void) return 0; } + +int memblock_alloc_exact_nid_range_checks(void) +{ + alloc_nid_test_flags = (TEST_F_RAW | TEST_F_EXACT); + + memblock_alloc_nid_range_checks(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h index 92d07d230e18..2b8cabacacb8 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.h +++ b/tools/testing/memblock/tests/alloc_nid_api.h @@ -5,6 +5,7 @@ #include "common.h" int memblock_alloc_nid_checks(void); +int memblock_alloc_exact_nid_range_checks(void); int __memblock_alloc_nid_numa_checks(void); #ifdef CONFIG_NUMA diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index a13a57ba0815..411647094cc3 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -423,6 +423,98 @@ static int memblock_add_near_max_check(void) return 0; } +/* + * A test that trying to add the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * memory.regions. + */ +static int memblock_add_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_memory_regions_size; + phys_addr_t base, size = SZ_64; + phys_addr_t gap_size = SZ_64; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + /* + * We allocated enough memory by using dummy_physical_memory_init(), and + * split it into small block. First we split a large enough memory block + * as the memory region which will be choosed by memblock_double_array(). + */ + base = PAGE_ALIGN(dummy_physical_memory_base()); + new_memory_regions_size = PAGE_ALIGN(INIT_MEMBLOCK_REGIONS * 2 * + sizeof(struct memblock_region)); + memblock_add(base, new_memory_regions_size); + + /* This is the base of small memory block. */ + base += new_memory_regions_size + gap_size; + + orig_region = memblock.memory.regions; + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* + * Add these small block to fulfill the memblock. We keep a + * gap between the nearby memory to avoid being merged. + */ + memblock_add(base, size); + base += size + gap_size; + + ASSERT_EQ(memblock.memory.cnt, i + 2); + ASSERT_EQ(memblock.memory.total_size, new_memory_regions_size + + (i + 1) * size); + } + + /* + * At there, memblock_double_array() has been succeed, check if it + * update the memory.max. + */ + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + /* memblock_double_array() will reserve the memory it used. Check it. */ + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, new_memory_regions_size); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_add() still works as normal. + */ + memblock_add(r.base, r.size); + ASSERT_EQ(memblock.memory.regions[0].base, r.base); + ASSERT_EQ(memblock.memory.regions[0].size, r.size); + + ASSERT_EQ(memblock.memory.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.memory.total_size, INIT_MEMBLOCK_REGIONS * size + + new_memory_regions_size + + r.size); + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current memory.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.memory.regions = orig_region; + memblock.memory.cnt = INIT_MEMBLOCK_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_add_checks(void) { prefix_reset(); @@ -438,6 +530,7 @@ static int memblock_add_checks(void) memblock_add_twice_check(); memblock_add_between_check(); memblock_add_near_max_check(); + memblock_add_many_check(); prefix_pop(); @@ -799,6 +892,96 @@ static int memblock_reserve_near_max_check(void) return 0; } +/* + * A test that trying to reserve the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * reserved.regions. + */ +static int memblock_reserve_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t memory_base = SZ_128K; + phys_addr_t new_reserved_regions_size; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + /* Add a valid memory region used by double_array(). */ + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(memory_base, MEM_SIZE); + + ASSERT_EQ(memblock.reserved.cnt, i + 1); + ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE); + + /* Keep the gap so these memory region will not be merged. */ + memory_base += MEM_SIZE * 2; + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(memory_base, MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -813,6 +996,7 @@ static int memblock_reserve_checks(void) memblock_reserve_twice_check(); memblock_reserve_between_check(); memblock_reserve_near_max_check(); + memblock_reserve_many_check(); prefix_pop(); diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index 3f795047bbe1..f43b6f414983 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -5,8 +5,6 @@ #include <linux/memory_hotplug.h> #include <linux/build_bug.h> -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #define PREFIXES_MAX 15 #define DELIM ": " #define BASIS 10000 @@ -115,6 +113,11 @@ void dummy_physical_memory_cleanup(void) free(memory_block.base); } +phys_addr_t dummy_physical_memory_base(void) +{ + return (phys_addr_t)memory_block.base; +} + static void usage(const char *prog) { BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1); diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index d6bbbe63bfc3..4f23302ee677 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -10,14 +10,19 @@ #include <linux/printk.h> #include <../selftests/kselftest.h> -#define MEM_SIZE SZ_16K +#define MEM_SIZE SZ_32K #define NUMA_NODES 8 +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS + enum test_flags { /* No special request. */ TEST_F_NONE = 0x0, /* Perform raw allocations (no zeroing of memory). */ TEST_F_RAW = 0x1, + /* Perform allocations on the exact node specified. */ + TEST_F_EXACT = 0x2 }; /** @@ -124,6 +129,7 @@ void setup_memblock(void); void setup_numa_memblock(const unsigned int node_fracs[]); void dummy_physical_memory_init(void); void dummy_physical_memory_cleanup(void); +phys_addr_t dummy_physical_memory_base(void); void parse_args(int argc, char **argv); void test_fail(void); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5eb5c23b062f..8153251ea389 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -79,7 +79,6 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o -libnvdimm-y += dimm_devs.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c deleted file mode 100644 index 57bd27dedf1f..000000000000 --- a/tools/testing/nvdimm/dimm_devs.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright Intel Corp. 2018 */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/nd.h> -#include "pmem.h" -#include "pfn.h" -#include "nd.h" -#include "nd-core.h" - -ssize_t security_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvdimm *nvdimm = to_nvdimm(dev); - - /* - * For the test version we need to poll the "hardware" in order - * to get the updated status for unlock testing. - */ - nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); - - if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) - return sprintf(buf, "disabled\n"); - if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "unlocked\n"); - if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "locked\n"); - return -ENOTTY; -} diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f07aef7c592c..41b649452560 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -27,6 +27,7 @@ TARGETS += ftrace TARGETS += futex TARGETS += gpio TARGETS += intel_pstate +TARGETS += iommu TARGETS += ipc TARGETS += ir TARGETS += kcmp @@ -48,6 +49,7 @@ TARGETS += nci TARGETS += net TARGETS += net/af_unix TARGETS += net/forwarding +TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += netfilter @@ -74,6 +76,7 @@ TARGETS += sync TARGETS += syscall_user_dispatch TARGETS += sysctl TARGETS += tc-testing +TARGETS += tdx TARGETS += timens ifneq (1, $(quicktest)) TARGETS += timers diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore index 3bb7c41266a8..2b0d11797f25 100644 --- a/tools/testing/selftests/alsa/.gitignore +++ b/tools/testing/selftests/alsa/.gitignore @@ -1 +1,2 @@ mixer-test +pcm-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index fd8ddce2b1a6..a8c0383878d3 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -6,7 +6,20 @@ LDLIBS += $(shell pkg-config --libs alsa) ifeq ($(LDLIBS),) LDLIBS += -lasound endif +CFLAGS += -L$(OUTPUT) -Wl,-rpath=./ -TEST_GEN_PROGS := mixer-test +OVERRIDE_TARGETS = 1 + +TEST_GEN_PROGS := mixer-test pcm-test + +TEST_GEN_PROGS_EXTENDED := libatest.so + +TEST_FILES := conf.d include ../lib.mk + +$(OUTPUT)/libatest.so: conf.c alsa-local.h + $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ + +$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h + $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h new file mode 100644 index 000000000000..65f197ea9773 --- /dev/null +++ b/tools/testing/selftests/alsa/alsa-local.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest configuration helpers for the hw specific configuration +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +#ifndef __ALSA_LOCAL_H +#define __ALSA_LOCAL_H + +#include <alsa/asoundlib.h> + +snd_config_t *get_alsalib_config(void); + +void conf_load(void); +void conf_free(void); +snd_config_t *conf_by_card(int card); +snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2); +int conf_get_count(snd_config_t *root, const char *key1, const char *key2); +const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def); +long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def); +int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def); + +#endif /* __ALSA_LOCAL_H */ diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c new file mode 100644 index 000000000000..c7ffc8f04195 --- /dev/null +++ b/tools/testing/selftests/alsa/conf.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest configuration helpers for the hw specific configuration +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <assert.h> +#include <dirent.h> +#include <regex.h> +#include <sys/stat.h> + +#include "../kselftest.h" +#include "alsa-local.h" + +#define SYSFS_ROOT "/sys" + +struct card_data { + int card; + snd_config_t *config; + const char *filename; + struct card_data *next; +}; + +static struct card_data *conf_cards; + +static const char *alsa_config = +"ctl.hw {\n" +" @args [ CARD ]\n" +" @args.CARD.type string\n" +" type hw\n" +" card $CARD\n" +"}\n" +"pcm.hw {\n" +" @args [ CARD DEV SUBDEV ]\n" +" @args.CARD.type string\n" +" @args.DEV.type integer\n" +" @args.SUBDEV.type integer\n" +" type hw\n" +" card $CARD\n" +" device $DEV\n" +" subdevice $SUBDEV\n" +"}\n" +; + +#ifdef SND_LIB_VER +#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) +#define LIB_HAS_LOAD_STRING +#endif +#endif + +#ifndef LIB_HAS_LOAD_STRING +static int snd_config_load_string(snd_config_t **config, const char *s, + size_t size) +{ + snd_input_t *input; + snd_config_t *dst; + int err; + + assert(config && s); + if (size == 0) + size = strlen(s); + err = snd_input_buffer_open(&input, s, size); + if (err < 0) + return err; + err = snd_config_top(&dst); + if (err < 0) { + snd_input_close(input); + return err; + } + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) { + snd_config_delete(dst); + return err; + } + *config = dst; + return 0; +} +#endif + +snd_config_t *get_alsalib_config(void) +{ + snd_config_t *config; + int err; + + err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); + if (err < 0) { + ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", + snd_strerror(err)); + ksft_exit_fail(); + } + return config; +} + +static struct card_data *conf_data_by_card(int card, bool msg) +{ + struct card_data *conf; + + for (conf = conf_cards; conf; conf = conf->next) { + if (conf->card == card) { + if (msg) + ksft_print_msg("using hw card config %s for card %d\n", + conf->filename, card); + return conf; + } + } + return NULL; +} + +static int dump_config_tree(snd_config_t *top) +{ + snd_output_t *out; + int err; + + err = snd_output_stdio_attach(&out, stdout, 0); + if (err < 0) + ksft_exit_fail_msg("stdout attach\n"); + if (snd_config_save(top, out)) + ksft_exit_fail_msg("config save\n"); + snd_output_close(out); +} + +static snd_config_t *load(const char *filename) +{ + snd_config_t *dst; + snd_input_t *input; + int err; + + err = snd_input_stdio_open(&input, filename, "r"); + if (err < 0) + ksft_exit_fail_msg("Unable to parse filename %s\n", filename); + err = snd_config_top(&dst); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) + ksft_exit_fail_msg("Unable to parse filename %s\n", filename); + return dst; +} + +static char *sysfs_get(const char *sysfs_root, const char *id) +{ + char path[PATH_MAX], link[PATH_MAX + 1]; + struct stat sb; + ssize_t len; + char *e; + int fd; + + if (id[0] == '/') + id++; + snprintf(path, sizeof(path), "%s/%s", sysfs_root, id); + if (lstat(path, &sb) != 0) + return NULL; + if (S_ISLNK(sb.st_mode)) { + len = readlink(path, link, sizeof(link) - 1); + if (len <= 0) { + ksft_exit_fail_msg("sysfs: cannot read link '%s': %s\n", + path, strerror(errno)); + return NULL; + } + link[len] = '\0'; + e = strrchr(link, '/'); + if (e) + return strdup(e + 1); + return NULL; + } + if (S_ISDIR(sb.st_mode)) + return NULL; + if ((sb.st_mode & S_IRUSR) == 0) + return NULL; + + fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) + return NULL; + ksft_exit_fail_msg("sysfs: open failed for '%s': %s\n", + path, strerror(errno)); + } + len = read(fd, path, sizeof(path)-1); + close(fd); + if (len < 0) + ksft_exit_fail_msg("sysfs: unable to read value '%s': %s\n", + path, errno); + while (len > 0 && path[len-1] == '\n') + len--; + path[len] = '\0'; + e = strdup(path); + if (e == NULL) + ksft_exit_fail_msg("Out of memory\n"); + return e; +} + +static bool sysfs_match(const char *sysfs_root, snd_config_t *config) +{ + snd_config_t *node, *path_config, *regex_config; + snd_config_iterator_t i, next; + const char *path_string, *regex_string, *v; + regex_t re; + regmatch_t match[1]; + int iter = 0, ret; + + snd_config_for_each(i, next, config) { + node = snd_config_iterator_entry(i); + if (snd_config_search(node, "path", &path_config)) + ksft_exit_fail_msg("Missing path field in the sysfs block\n"); + if (snd_config_search(node, "regex", ®ex_config)) + ksft_exit_fail_msg("Missing regex field in the sysfs block\n"); + if (snd_config_get_string(path_config, &path_string)) + ksft_exit_fail_msg("Path field in the sysfs block is not a string\n"); + if (snd_config_get_string(regex_config, ®ex_string)) + ksft_exit_fail_msg("Regex field in the sysfs block is not a string\n"); + iter++; + v = sysfs_get(sysfs_root, path_string); + if (!v) + return false; + if (regcomp(&re, regex_string, REG_EXTENDED)) + ksft_exit_fail_msg("Wrong regex '%s'\n", regex_string); + ret = regexec(&re, v, 1, match, 0); + regfree(&re); + if (ret) + return false; + } + return iter > 0; +} + +static bool test_filename1(int card, const char *filename, const char *sysfs_card_root) +{ + struct card_data *data, *data2; + snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node; + snd_config_iterator_t i, next; + + config = load(filename); + if (snd_config_search(config, "sysfs", &sysfs_config) || + snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename); + if (snd_config_search(config, "card", &card_config) || + snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global card block in filename %s\n", filename); + if (!sysfs_match(SYSFS_ROOT, sysfs_config)) + return false; + snd_config_for_each(i, next, card_config) { + node = snd_config_iterator_entry(i); + if (snd_config_search(node, "sysfs", &sysfs_card_config) || + snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename); + if (!sysfs_match(sysfs_card_root, sysfs_card_config)) + continue; + data = malloc(sizeof(*data)); + if (!data) + ksft_exit_fail_msg("Out of memory\n"); + data2 = conf_data_by_card(card, false); + if (data2) + ksft_exit_fail_msg("Duplicate card '%s' <-> '%s'\n", filename, data2->filename); + data->card = card; + data->filename = filename; + data->config = node; + data->next = conf_cards; + conf_cards = data; + return true; + } + return false; +} + +static bool test_filename(const char *filename) +{ + char fn[128]; + int card; + + for (card = 0; card < 32; card++) { + snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card); + if (access(fn, R_OK) == 0 && test_filename1(card, filename, fn)) + return true; + } + return false; +} + +static int filename_filter(const struct dirent *dirent) +{ + size_t flen; + + if (dirent == NULL) + return 0; + if (dirent->d_type == DT_DIR) + return 0; + flen = strlen(dirent->d_name); + if (flen <= 5) + return 0; + if (strncmp(&dirent->d_name[flen-5], ".conf", 5) == 0) + return 1; + return 0; +} + +void conf_load(void) +{ + const char *fn = "conf.d"; + struct dirent **namelist; + int n, j; + + n = scandir(fn, &namelist, filename_filter, alphasort); + if (n < 0) + ksft_exit_fail_msg("scandir: %s\n", strerror(errno)); + for (j = 0; j < n; j++) { + size_t sl = strlen(fn) + strlen(namelist[j]->d_name) + 2; + char *filename = malloc(sl); + if (filename == NULL) + ksft_exit_fail_msg("Out of memory\n"); + sprintf(filename, "%s/%s", fn, namelist[j]->d_name); + if (test_filename(filename)) + filename = NULL; + free(filename); + free(namelist[j]); + } + free(namelist); +} + +void conf_free(void) +{ + struct card_data *conf; + + while (conf_cards) { + conf = conf_cards; + conf_cards = conf->next; + snd_config_delete(conf->config); + } +} + +snd_config_t *conf_by_card(int card) +{ + struct card_data *conf; + + conf = conf_data_by_card(card, true); + if (conf) + return conf->config; + return NULL; +} + +static int conf_get_by_keys(snd_config_t *root, const char *key1, + const char *key2, snd_config_t **result) +{ + int ret; + + if (key1) { + ret = snd_config_search(root, key1, &root); + if (ret != -ENOENT && ret < 0) + return ret; + } + if (key2) + ret = snd_config_search(root, key2, &root); + if (ret >= 0) + *result = root; + return ret; +} + +snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2) +{ + int ret; + + if (!root) + return NULL; + ret = conf_get_by_keys(root, key1, key2, &root); + if (ret == -ENOENT) + return NULL; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + return root; +} + +int conf_get_count(snd_config_t *root, const char *key1, const char *key2) +{ + snd_config_t *cfg; + snd_config_iterator_t i, next; + int count, ret; + + if (!root) + return -1; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return -1; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_type(cfg) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("key '%s'.'%s' is not a compound\n", key1, key2); + count = 0; + snd_config_for_each(i, next, cfg) + count++; + return count; +} + +const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def) +{ + snd_config_t *cfg; + const char *s; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_string(cfg, &s)) + ksft_exit_fail_msg("key '%s'.'%s' is not a string\n", key1, key2); + return s; +} + +long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def) +{ + snd_config_t *cfg; + long l; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_integer(cfg, &l)) + ksft_exit_fail_msg("key '%s'.'%s' is not an integer\n", key1, key2); + return l; +} + +int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) +{ + snd_config_t *cfg; + long l; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + ret = snd_config_get_bool(cfg); + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' is not an bool\n", key1, key2); + return !!ret; +} diff --git a/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf new file mode 100644 index 000000000000..9eca985e0c08 --- /dev/null +++ b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf @@ -0,0 +1,79 @@ +# +# Example configuration for Lenovo ThinkPad P1 Gen2 +# + +# +# Use regex match for the string read from the given sysfs path +# +# The sysfs root directory (/sys) is hardwired in the test code +# (may be changed on demand). +# +# All strings must match. +# +sysfs [ + { + path "class/dmi/id/product_sku" + regex "LENOVO_MT_20QU_BU_Think_FM_ThinkPad P1 Gen 2" + } +] + +card.hda { + # + # Use regex match for the /sys/class/sound/card*/ tree (relative) + # + sysfs [ + { + path "device/subsystem_device" + regex "0x229e" + } + { + path "device/subsystem_vendor" + regex "0x17aa" + } + ] + + # + # PCM configuration + # + # pcm.0.0 - device 0 subdevice 0 + # + pcm.0.0 { + PLAYBACK { + # + # Uncomment to override values for specific tests + # + #test_name1 { + # access RW_INTERLEAVED + # format S16_LE + # rate 48000 + # channels 2 + # period_size 512 + # buffer_size 4096 + #} + #test_name2 { + # access RW_INTERLEAVED + # format S16_LE + # rate 48000 + # channels 2 + # period_size 24000 + # buffer_size 192000 + #} + } + CAPTURE { + # use default tests, check for the presence + } + } + # + # uncomment to force the missing device checks + # + #pcm.0.2 { + # PLAYBACK { + # # check for the presence + # } + #} + #pcm.0.3 { + # CAPTURE { + # # check for the presence + # } + #} +} diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index a38b89c28030..05f1749ae19d 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -26,6 +26,7 @@ #include <stdint.h> #include "../kselftest.h" +#include "alsa-local.h" #define TESTS_PER_CONTROL 7 @@ -50,56 +51,11 @@ struct ctl_data { struct ctl_data *next; }; -static const char *alsa_config = -"ctl.hw {\n" -" @args [ CARD ]\n" -" @args.CARD.type string\n" -" type hw\n" -" card $CARD\n" -"}\n" -; - int num_cards = 0; int num_controls = 0; struct card_data *card_list = NULL; struct ctl_data *ctl_list = NULL; -#ifdef SND_LIB_VER -#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) -#define LIB_HAS_LOAD_STRING -#endif -#endif - -#ifndef LIB_HAS_LOAD_STRING -static int snd_config_load_string(snd_config_t **config, const char *s, - size_t size) -{ - snd_input_t *input; - snd_config_t *dst; - int err; - - assert(config && s); - if (size == 0) - size = strlen(s); - err = snd_input_buffer_open(&input, s, size); - if (err < 0) - return err; - err = snd_config_top(&dst); - if (err < 0) { - snd_input_close(input); - return err; - } - err = snd_config_load(dst, input); - snd_input_close(input); - if (err < 0) { - snd_config_delete(dst); - return err; - } - *config = dst; - return 0; -} -#endif - static void find_controls(void) { char name[32]; @@ -112,12 +68,7 @@ static void find_controls(void) if (snd_card_next(&card) < 0 || card < 0) return; - err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); - if (err < 0) { - ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", - snd_strerror(err)); - ksft_exit_fail(); - } + config = get_alsalib_config(); while (card >= 0) { sprintf(name, "hw:%d", card); diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c new file mode 100644 index 000000000000..f293c7d81009 --- /dev/null +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest for the ALSA PCM API +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +// This test will iterate over all cards detected in the system, exercising +// every PCM device it can find. This may conflict with other system +// software if there is audio activity so is best run on a system with a +// minimal active userspace. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <assert.h> + +#include "../kselftest.h" +#include "alsa-local.h" + +typedef struct timespec timestamp_t; + +struct pcm_data { + snd_pcm_t *handle; + int card; + int device; + int subdevice; + snd_pcm_stream_t stream; + snd_config_t *pcm_config; + struct pcm_data *next; +}; + +int num_pcms = 0; +struct pcm_data *pcm_list = NULL; + +int num_missing = 0; +struct pcm_data *pcm_missing = NULL; + +struct time_test_def { + const char *cfg_prefix; + const char *format; + long rate; + long channels; + long period_size; + long buffer_size; +}; + +void timestamp_now(timestamp_t *tstamp) +{ + if (clock_gettime(CLOCK_MONOTONIC_RAW, tstamp)) + ksft_exit_fail_msg("clock_get_time\n"); +} + +long long timestamp_diff_ms(timestamp_t *tstamp) +{ + timestamp_t now, diff; + timestamp_now(&now); + if (tstamp->tv_nsec > now.tv_nsec) { + diff.tv_sec = now.tv_sec - tstamp->tv_sec - 1; + diff.tv_nsec = (now.tv_nsec + 1000000000L) - tstamp->tv_nsec; + } else { + diff.tv_sec = now.tv_sec - tstamp->tv_sec; + diff.tv_nsec = now.tv_nsec - tstamp->tv_nsec; + } + return (diff.tv_sec * 1000) + ((diff.tv_nsec + 500000L) / 1000000L); +} + +static long device_from_id(snd_config_t *node) +{ + const char *id; + char *end; + long v; + + if (snd_config_get_id(node, &id)) + ksft_exit_fail_msg("snd_config_get_id\n"); + errno = 0; + v = strtol(id, &end, 10); + if (errno || *end) + return -1; + return v; +} + +static void missing_device(int card, int device, int subdevice, snd_pcm_stream_t stream) +{ + struct pcm_data *pcm_data; + + for (pcm_data = pcm_list; pcm_data != NULL; pcm_data = pcm_data->next) { + if (pcm_data->card != card) + continue; + if (pcm_data->device != device) + continue; + if (pcm_data->subdevice != subdevice) + continue; + if (pcm_data->stream != stream) + continue; + return; + } + pcm_data = calloc(1, sizeof(*pcm_data)); + if (!pcm_data) + ksft_exit_fail_msg("Out of memory\n"); + pcm_data->card = card; + pcm_data->device = device; + pcm_data->subdevice = subdevice; + pcm_data->stream = stream; + pcm_data->next = pcm_missing; + pcm_missing = pcm_data; + num_missing++; +} + +static void missing_devices(int card, snd_config_t *card_config) +{ + snd_config_t *pcm_config, *node1, *node2; + snd_config_iterator_t i1, i2, next1, next2; + int device, subdevice; + + pcm_config = conf_get_subtree(card_config, "pcm", NULL); + if (!pcm_config) + return; + snd_config_for_each(i1, next1, pcm_config) { + node1 = snd_config_iterator_entry(i1); + device = device_from_id(node1); + if (device < 0) + continue; + if (snd_config_get_type(node1) != SND_CONFIG_TYPE_COMPOUND) + continue; + snd_config_for_each(i2, next2, node1) { + node2 = snd_config_iterator_entry(i2); + subdevice = device_from_id(node2); + if (subdevice < 0) + continue; + if (conf_get_subtree(node2, "PLAYBACK", NULL)) + missing_device(card, device, subdevice, SND_PCM_STREAM_PLAYBACK); + if (conf_get_subtree(node2, "CAPTURE", NULL)) + missing_device(card, device, subdevice, SND_PCM_STREAM_CAPTURE); + } + } +} + +static void find_pcms(void) +{ + char name[32], key[64]; + int card, dev, subdev, count, direction, err; + snd_pcm_stream_t stream; + struct pcm_data *pcm_data; + snd_ctl_t *handle; + snd_pcm_info_t *pcm_info; + snd_config_t *config, *card_config, *pcm_config; + + snd_pcm_info_alloca(&pcm_info); + + card = -1; + if (snd_card_next(&card) < 0 || card < 0) + return; + + config = get_alsalib_config(); + + while (card >= 0) { + sprintf(name, "hw:%d", card); + + err = snd_ctl_open_lconf(&handle, name, 0, config); + if (err < 0) { + ksft_print_msg("Failed to get hctl for card %d: %s\n", + card, snd_strerror(err)); + goto next_card; + } + + card_config = conf_by_card(card); + + dev = -1; + while (1) { + if (snd_ctl_pcm_next_device(handle, &dev) < 0) + ksft_exit_fail_msg("snd_ctl_pcm_next_device\n"); + if (dev < 0) + break; + + for (direction = 0; direction < 2; direction++) { + stream = direction ? SND_PCM_STREAM_CAPTURE : SND_PCM_STREAM_PLAYBACK; + sprintf(key, "pcm.%d.%s", dev, snd_pcm_stream_name(stream)); + pcm_config = conf_get_subtree(card_config, key, NULL); + if (conf_get_bool(card_config, key, "skip", false)) { + ksft_print_msg("skipping pcm %d.%d.%s\n", card, dev, snd_pcm_stream_name(stream)); + continue; + } + snd_pcm_info_set_device(pcm_info, dev); + snd_pcm_info_set_subdevice(pcm_info, 0); + snd_pcm_info_set_stream(pcm_info, stream); + err = snd_ctl_pcm_info(handle, pcm_info); + if (err == -ENOENT) + continue; + if (err < 0) + ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n", + dev, 0, stream); + count = snd_pcm_info_get_subdevices_count(pcm_info); + for (subdev = 0; subdev < count; subdev++) { + sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream)); + if (conf_get_bool(card_config, key, "skip", false)) { + ksft_print_msg("skipping pcm %d.%d.%d.%s\n", card, dev, + subdev, snd_pcm_stream_name(stream)); + continue; + } + pcm_data = calloc(1, sizeof(*pcm_data)); + if (!pcm_data) + ksft_exit_fail_msg("Out of memory\n"); + pcm_data->card = card; + pcm_data->device = dev; + pcm_data->subdevice = subdev; + pcm_data->stream = stream; + pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL); + pcm_data->next = pcm_list; + pcm_list = pcm_data; + num_pcms++; + } + } + } + + /* check for missing devices */ + missing_devices(card, card_config); + + next_card: + snd_ctl_close(handle); + if (snd_card_next(&card) < 0) { + ksft_print_msg("snd_card_next"); + break; + } + } + + snd_config_delete(config); +} + +static void test_pcm_time1(struct pcm_data *data, + const struct time_test_def *test) +{ + char name[64], key[128], msg[256]; + const char *cs; + int i, err; + snd_pcm_t *handle = NULL; + snd_pcm_access_t access = SND_PCM_ACCESS_RW_INTERLEAVED; + snd_pcm_format_t format; + unsigned char *samples = NULL; + snd_pcm_sframes_t frames; + long long ms; + long rate, channels, period_size, buffer_size; + unsigned int rchannels; + unsigned int rrate; + snd_pcm_uframes_t rperiod_size, rbuffer_size, start_threshold; + timestamp_t tstamp; + bool pass = false, automatic = true; + snd_pcm_hw_params_t *hw_params; + snd_pcm_sw_params_t *sw_params; + bool skip = false; + + snd_pcm_hw_params_alloca(&hw_params); + snd_pcm_sw_params_alloca(&sw_params); + + cs = conf_get_string(data->pcm_config, test->cfg_prefix, "format", test->format); + format = snd_pcm_format_value(cs); + if (format == SND_PCM_FORMAT_UNKNOWN) + ksft_exit_fail_msg("Wrong format '%s'\n", cs); + rate = conf_get_long(data->pcm_config, test->cfg_prefix, "rate", test->rate); + channels = conf_get_long(data->pcm_config, test->cfg_prefix, "channels", test->channels); + period_size = conf_get_long(data->pcm_config, test->cfg_prefix, "period_size", test->period_size); + buffer_size = conf_get_long(data->pcm_config, test->cfg_prefix, "buffer_size", test->buffer_size); + + automatic = strcmp(test->format, snd_pcm_format_name(format)) == 0 && + test->rate == rate && + test->channels == channels && + test->period_size == period_size && + test->buffer_size == buffer_size; + + samples = malloc((rate * channels * snd_pcm_format_physical_width(format)) / 8); + if (!samples) + ksft_exit_fail_msg("Out of memory\n"); + snd_pcm_format_set_silence(format, samples, rate * channels); + + sprintf(name, "hw:%d,%d,%d", data->card, data->device, data->subdevice); + err = snd_pcm_open(&handle, name, data->stream, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "Failed to get pcm handle: %s", snd_strerror(err)); + goto __close; + } + + err = snd_pcm_hw_params_any(handle, hw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_any: %s", snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params_set_rate_resample(handle, hw_params, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate_resample: %s", snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params_set_access(handle, hw_params, access); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_access %s: %s", + snd_pcm_access_name(access), snd_strerror(err)); + goto __close; + } +__format: + err = snd_pcm_hw_params_set_format(handle, hw_params, format); + if (err < 0) { + if (automatic && format == SND_PCM_FORMAT_S16_LE) { + format = SND_PCM_FORMAT_S32_LE; + ksft_print_msg("%s.%d.%d.%d.%s.%s format S16_LE -> S32_LE\n", + test->cfg_prefix, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + snd_pcm_access_name(access)); + } + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_format %s: %s", + snd_pcm_format_name(format), snd_strerror(err)); + goto __close; + } + rchannels = channels; + err = snd_pcm_hw_params_set_channels_near(handle, hw_params, &rchannels); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_channels %ld: %s", channels, snd_strerror(err)); + goto __close; + } + if (rchannels != channels) { + snprintf(msg, sizeof(msg), "channels unsupported %ld != %ld", channels, rchannels); + skip = true; + goto __close; + } + rrate = rate; + err = snd_pcm_hw_params_set_rate_near(handle, hw_params, &rrate, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate %ld: %s", rate, snd_strerror(err)); + goto __close; + } + if (rrate != rate) { + snprintf(msg, sizeof(msg), "rate unsupported %ld != %ld", rate, rrate); + skip = true; + goto __close; + } + rperiod_size = period_size; + err = snd_pcm_hw_params_set_period_size_near(handle, hw_params, &rperiod_size, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_period_size %ld: %s", period_size, snd_strerror(err)); + goto __close; + } + rbuffer_size = buffer_size; + err = snd_pcm_hw_params_set_buffer_size_near(handle, hw_params, &rbuffer_size); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_buffer_size %ld: %s", buffer_size, snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params(handle, hw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params: %s", snd_strerror(err)); + goto __close; + } + + err = snd_pcm_sw_params_current(handle, sw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_current: %s", snd_strerror(err)); + goto __close; + } + if (data->stream == SND_PCM_STREAM_PLAYBACK) { + start_threshold = (rbuffer_size / rperiod_size) * rperiod_size; + } else { + start_threshold = rperiod_size; + } + err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, start_threshold); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_start_threshold %ld: %s", (long)start_threshold, snd_strerror(err)); + goto __close; + } + err = snd_pcm_sw_params_set_avail_min(handle, sw_params, rperiod_size); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_avail_min %ld: %s", (long)rperiod_size, snd_strerror(err)); + goto __close; + } + err = snd_pcm_sw_params(handle, sw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params: %s", snd_strerror(err)); + goto __close; + } + + ksft_print_msg("%s.%d.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n", + test->cfg_prefix, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + snd_pcm_access_name(access), + snd_pcm_format_name(format), + (long)rate, (long)channels, + (long)rperiod_size, (long)rbuffer_size, + (long)start_threshold); + + timestamp_now(&tstamp); + for (i = 0; i < 4; i++) { + if (data->stream == SND_PCM_STREAM_PLAYBACK) { + frames = snd_pcm_writei(handle, samples, rate); + if (frames < 0) { + snprintf(msg, sizeof(msg), + "Write failed: expected %d, wrote %li", rate, frames); + goto __close; + } + if (frames < rate) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + } else { + frames = snd_pcm_readi(handle, samples, rate); + if (frames < 0) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + if (frames < rate) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + } + } + + snd_pcm_drain(handle); + ms = timestamp_diff_ms(&tstamp); + if (ms < 3900 || ms > 4100) { + snprintf(msg, sizeof(msg), "time mismatch: expected 4000ms got %lld", ms); + goto __close; + } + + msg[0] = '\0'; + pass = true; +__close: + if (!skip) { + ksft_test_result(pass, "%s.%d.%d.%d.%s%s%s\n", + test->cfg_prefix, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + msg[0] ? " " : "", msg); + } else { + ksft_test_result_skip("%s.%d.%d.%d.%s%s%s\n", + test->cfg_prefix, + data->card, data->device, + data->subdevice, + snd_pcm_stream_name(data->stream), + msg[0] ? " " : "", msg); + } + free(samples); + if (handle) + snd_pcm_close(handle); +} + +static const struct time_test_def time_tests[] = { + /* name format rate chan period buffer */ + { "8k.1.big", "S16_LE", 8000, 2, 8000, 32000 }, + { "8k.2.big", "S16_LE", 8000, 2, 8000, 32000 }, + { "44k1.2.big", "S16_LE", 44100, 2, 22050, 192000 }, + { "48k.2.small", "S16_LE", 48000, 2, 512, 4096 }, + { "48k.2.big", "S16_LE", 48000, 2, 24000, 192000 }, + { "48k.6.big", "S16_LE", 48000, 6, 48000, 576000 }, + { "96k.2.big", "S16_LE", 96000, 2, 48000, 192000 }, +}; + +int main(void) +{ + struct pcm_data *pcm; + int i; + + ksft_print_header(); + + conf_load(); + + find_pcms(); + + ksft_set_plan(num_missing + num_pcms * ARRAY_SIZE(time_tests)); + + for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) { + ksft_test_result(false, "test.missing.%d.%d.%d.%s\n", + pcm->card, pcm->device, pcm->subdevice, + snd_pcm_stream_name(pcm->stream)); + } + + for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) { + for (i = 0; i < ARRAY_SIZE(time_tests); i++) { + test_pcm_time1(pcm, &time_tests[i]); + } + } + + conf_free(); + + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile index 199867f44b32..5f195ee756d6 100644 --- a/tools/testing/selftests/amd-pstate/Makefile +++ b/tools/testing/selftests/amd-pstate/Makefile @@ -4,6 +4,15 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := amd-pstate-ut.sh +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq (x86,$(ARCH)) +TEST_GEN_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py +TEST_GEN_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py +endif + +TEST_PROGS := run.sh +TEST_FILES := basic.sh tbench.sh gitsource.sh include ../lib.mk diff --git a/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh b/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh deleted file mode 100755 index f8e82d91ffcf..000000000000 --- a/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# amd-pstate-ut is a test module for testing the amd-pstate driver. -# It can only run on x86 architectures and current cpufreq driver -# must be amd-pstate. -# (1) It can help all users to verify their processor support -# (SBIOS/Firmware or Hardware). -# (2) Kernel can have a basic function test to avoid the kernel -# regression during the update. -# (3) We can introduce more functional or performance tests to align -# the result together, it will benefit power and performance scale optimization. - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -# amd-pstate-ut only run on x86/x86_64 AMD systems. -ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/') -VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}') - -if ! echo "$ARCH" | grep -q x86; then - echo "$0 # Skipped: Test can only run on x86 architectures." - exit $ksft_skip -fi - -if ! echo "$VENDOR" | grep -iq amd; then - echo "$0 # Skipped: Test can only run on AMD CPU." - echo "$0 # Current cpu vendor is $VENDOR." - exit $ksft_skip -fi - -scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver) -if [ "$scaling_driver" != "amd-pstate" ]; then - echo "$0 # Skipped: Test can only run on amd-pstate driver." - echo "$0 # Please set X86_AMD_PSTATE enabled." - echo "$0 # Current cpufreq scaling drvier is $scaling_driver." - exit $ksft_skip -fi - -msg="Skip all tests:" -if [ ! -w /dev ]; then - echo $msg please run this as root >&2 - exit $ksft_skip -fi - -if ! /sbin/modprobe -q -n amd-pstate-ut; then - echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]" - exit $ksft_skip -fi -if /sbin/modprobe -q amd-pstate-ut; then - /sbin/modprobe -q -r amd-pstate-ut - echo "amd-pstate-ut: ok" -else - echo "amd-pstate-ut: [FAIL]" - exit 1 -fi diff --git a/tools/testing/selftests/amd-pstate/basic.sh b/tools/testing/selftests/amd-pstate/basic.sh new file mode 100755 index 000000000000..e4c43193e4a3 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/basic.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# amd-pstate-ut is a test module for testing the amd-pstate driver. +# It can only run on x86 architectures and current cpufreq driver +# must be amd-pstate. +# (1) It can help all users to verify their processor support +# (SBIOS/Firmware or Hardware). +# (2) Kernel can have a basic function test to avoid the kernel +# regression during the update. +# (3) We can introduce more functional or performance tests to align +# the result together, it will benefit power and performance scale optimization. + +# protect against multiple inclusion +if [ $FILE_BASIC ]; then + return 0 +else + FILE_BASIC=DONE +fi + +amd_pstate_basic() +{ + printf "\n---------------------------------------------\n" + printf "*** Running AMD P-state ut ***" + printf "\n---------------------------------------------\n" + + if ! /sbin/modprobe -q -n amd-pstate-ut; then + echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]" + exit $ksft_skip + fi + if /sbin/modprobe -q amd-pstate-ut; then + /sbin/modprobe -q -r amd-pstate-ut + echo "amd-pstate-basic: ok" + else + echo "amd-pstate-basic: [FAIL]" + exit 1 + fi +} diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh new file mode 100755 index 000000000000..dbc1fe45599d --- /dev/null +++ b/tools/testing/selftests/amd-pstate/gitsource.sh @@ -0,0 +1,354 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Testing and monitor the cpu desire performance, frequency, load, +# power consumption and throughput etc. when this script trigger +# gitsource test. +# 1) Download and tar gitsource codes. +# 2) Run gitsource benchmark on specific governors, ondemand or schedutil. +# 3) Run tbench benchmark comparative test on acpi-cpufreq kernel driver. +# 4) Get desire performance, frequency, load by perf. +# 5) Get power consumption and throughput by amd_pstate_trace.py. +# 6) Get run time by /usr/bin/time. +# 7) Analyse test results and save it in file selftest.gitsource.csv. +#8) Plot png images about time, energy and performance per watt for each test. + +# protect against multiple inclusion +if [ $FILE_GITSOURCE ]; then + return 0 +else + FILE_GITSOURCE=DONE +fi + +git_name="git-2.15.1" +git_tar="$git_name.tar.gz" +gitsource_url="https://github.com/git/git/archive/refs/tags/v2.15.1.tar.gz" +gitsource_governors=("ondemand" "schedutil") + +# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: time $7: energy, $8: PPW +store_csv_gitsource() +{ + echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_GIT.csv > /dev/null 2>&1 +} + +# clear some special lines +clear_csv_gitsource() +{ + if [ -f $OUTFILE_GIT.csv ]; then + sed -i '/Comprison(%)/d' $OUTFILE_GIT.csv + sed -i "/$(scaling_name)/d" $OUTFILE_GIT.csv + fi +} + +# find string $1 in file csv and get the number of lines +get_lines_csv_gitsource() +{ + if [ -f $OUTFILE_GIT.csv ]; then + return `grep -c "$1" $OUTFILE_GIT.csv` + else + return 0 + fi +} + +pre_clear_gitsource() +{ + post_clear_gitsource + rm -rf gitsource_*.png + clear_csv_gitsource +} + +post_clear_gitsource() +{ + rm -rf results/tracer-gitsource* + rm -rf $OUTFILE_GIT*.log + rm -rf $OUTFILE_GIT*.result +} + +install_gitsource() +{ + if [ ! -d $git_name ]; then + printf "Download gitsource, please wait a moment ...\n\n" + wget -O $git_tar $gitsource_url > /dev/null 2>&1 + + printf "Tar gitsource ...\n\n" + tar -xzf $git_tar + fi +} + +# $1: governor, $2: loop +run_gitsource() +{ + echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" + ./amd_pstate_trace.py -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + + printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n" + cd $git_name + perf stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o ../$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > ../$OUTFILE_GIT-perf-$1-$2.log 2>&1 + cd .. + + for job in `jobs -p` + do + echo "Waiting for job id $job" + wait $job + done +} + +# $1: governor, $2: loop +parse_gitsource() +{ + awk '{print $5}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-des-perf-$1-$2.log + avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-des-perf-$1-$2.log) + printf "Gitsource-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result + + awk '{print $7}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-freq-$1-$2.log + avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-freq-$1-$2.log) + printf "Gitsource-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result + + awk '{print $11}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-load-$1-$2.log + avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-load-$1-$2.log) + printf "Gitsource-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result + + grep user $OUTFILE_GIT.time-gitsource-$1-$2.log | awk '{print $1}' | sed -e 's/user//' > $OUTFILE_GIT-time-$1-$2.log + time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1-$2.log) + printf "Gitsource-$1-#$2 user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result + + grep Joules $OUTFILE_GIT-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_GIT-energy-$1-$2.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1-$2.log) + printf "Gitsource-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result + + # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t + # senconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # 1/t 1/t 1 + # ----- = ----- = --- + # P E/t E + # with unit given by 1 per joule. + ppw=`echo "scale=9;1/$en_sum" | bc | awk '{printf "%.9f", $0}'` + printf "Gitsource-$1-#$2 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + driver_name=`echo $(scaling_name)` + store_csv_gitsource "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $time_sum $en_sum $ppw +} + +# $1: governor +loop_gitsource() +{ + printf "\nGitsource total test times is $LOOP_TIMES for $1\n\n" + for i in `seq 1 $LOOP_TIMES` + do + run_gitsource $1 $i + parse_gitsource $1 $i + done +} + +# $1: governor +gather_gitsource() +{ + printf "Gitsource test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_GIT.result + printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_GIT-des-perf-$1.log + avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-des-perf-$1.log) + printf "Gitsource-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_GIT-freq-$1.log + avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-freq-$1.log) + printf "Gitsource-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_GIT-load-$1.log + avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-load-$1.log) + printf "Gitsource-$1 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "user time(s):" | awk '{print $NF}' > $OUTFILE_GIT-time-$1.log + time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1.log) + printf "Gitsource-$1 total user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result + + avg_time=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-time-$1.log) + printf "Gitsource-$1 avg user times(s): $avg_time\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_GIT-energy-$1.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1.log) + printf "Gitsource-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result + + avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-energy-$1.log) + printf "Gitsource-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_GIT.result + + # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t + # senconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # 1/t 1/t 1 + # ----- = ----- = --- + # P E/t E + # with unit given by 1 per joule. + ppw=`echo "scale=9;1/$avg_en" | bc | awk '{printf "%.9f", $0}'` + printf "Gitsource-$1 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + driver_name=`echo $(scaling_name)` + store_csv_gitsource "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_time $avg_en $ppw +} + +# $1: base scaling_driver $2: base governor $3: comparison scaling_driver $4: comparison governor +__calc_comp_gitsource() +{ + base=`grep "$1-$2" $OUTFILE_GIT.csv | grep "Average"` + comp=`grep "$3-$4" $OUTFILE_GIT.csv | grep "Average"` + + if [ -n "$base" -a -n "$comp" ]; then + printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result + printf "Gitsource comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_GIT.result + printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result + + # get the base values + des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//` + freq_base=`echo "$base" | awk '{print $4}' | sed s/,//` + load_base=`echo "$base" | awk '{print $5}' | sed s/,//` + time_base=`echo "$base" | awk '{print $6}' | sed s/,//` + energy_base=`echo "$base" | awk '{print $7}' | sed s/,//` + ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//` + + # get the comparison values + des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//` + freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//` + load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//` + time_comp=`echo "$comp" | awk '{print $6}' | sed s/,//` + energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//` + ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//` + + # compare the base and comp values + des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_GIT.result + + freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_GIT.result + + load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_GIT.result + + time_drop=`echo "scale=4;($time_comp-$time_base)*100/$time_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 time base: $time_base comprison: $time_comp percent: $time_drop\n" | tee -a $OUTFILE_GIT.result + + energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_GIT.result + + ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + store_csv_gitsource "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$time_drop" "$energy_drop" "$ppw_drop" + fi +} + +# calculate the comparison(%) +calc_comp_gitsource() +{ + # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[0]} ${gitsource_governors[1]} + + # amd-pstate-ondemand VS amd-pstate-schedutil + __calc_comp_gitsource ${all_scaling_names[1]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[1]} + + # acpi-cpufreq-ondemand VS amd-pstate-ondemand + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[0]} + + # acpi-cpufreq-schedutil VS amd-pstate-schedutil + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[1]} ${all_scaling_names[1]} ${gitsource_governors[1]} +} + +# $1: file_name, $2: title, $3: ylable, $4: column +plot_png_gitsource() +{ + # all_scaling_names[1] all_scaling_names[0] flag + # amd-pstate acpi-cpufreq + # N N 0 + # N Y 1 + # Y N 2 + # Y Y 3 + ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + flag=0 + else + flag=1 + fi + else + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + flag=2 + else + flag=3 + fi + fi + + gnuplot << EOF + set term png + set output "$1" + + set title "$2" + set xlabel "Test Cycles (round)" + set ylabel "$3" + + set grid + set style data histogram + set style fill solid 0.5 border + set boxwidth 0.8 + + if ($flag == 1) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}" + } else { + if ($flag == 2) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}" + } else { + if ($flag == 3 ) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}" + } + } + } + quit +EOF +} + +amd_pstate_gitsource() +{ + printf "\n---------------------------------------------\n" + printf "*** Running gitsource ***" + printf "\n---------------------------------------------\n" + + pre_clear_gitsource + + install_gitsource + + get_lines_csv_gitsource "Governor" + if [ $? -eq 0 ]; then + # add titles and unit for csv file + store_csv_gitsource "Governor" "Round" "Des-perf" "Freq" "Load" "Time" "Energy" "Performance Per Watt" + store_csv_gitsource "Unit" "" "" "GHz" "" "s" "J" "1/J" + fi + + backup_governor + for governor in ${gitsource_governors[*]} ; do + printf "\nSpecified governor is $governor\n\n" + switch_governor $governor + loop_gitsource $governor + gather_gitsource $governor + done + restore_governor + + plot_png_gitsource "gitsource_time.png" "Gitsource Benchmark Time" "Time (s)" 6 + plot_png_gitsource "gitsource_energy.png" "Gitsource Benchmark Energy" "Energy (J)" 7 + plot_png_gitsource "gitsource_ppw.png" "Gitsource Benchmark Performance Per Watt" "Performance Per Watt (1/J)" 8 + + calc_comp_gitsource + + post_clear_gitsource +} diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh new file mode 100755 index 000000000000..57cad57e59c0 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/run.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# protect against multiple inclusion +if [ $FILE_MAIN ]; then + return 0 +else + FILE_MAIN=DONE +fi + +source basic.sh +source tbench.sh +source gitsource.sh + +# amd-pstate-ut only run on x86/x86_64 AMD systems. +ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/') +VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}') + +msg="Skip all tests:" +FUNC=all +OUTFILE=selftest +OUTFILE_TBENCH="$OUTFILE.tbench" +OUTFILE_GIT="$OUTFILE.gitsource" + +SYSFS= +CPUROOT= +CPUFREQROOT= +MAKE_CPUS= + +TIME_LIMIT=100 +PROCESS_NUM=128 +LOOP_TIMES=3 +TRACER_INTERVAL=10 +CURRENT_TEST=amd-pstate +COMPARATIVE_TEST= + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +all_scaling_names=("acpi-cpufreq" "amd-pstate") + +# Get current cpufreq scaling driver name +scaling_name() +{ + if [ "$COMPARATIVE_TEST" = "" ]; then + echo "$CURRENT_TEST" + else + echo "$COMPARATIVE_TEST" + fi +} + +# Counts CPUs with cpufreq directories +count_cpus() +{ + count=0; + + for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do + if [ -d $CPUROOT/$cpu/cpufreq ]; then + let count=count+1; + fi + done + + echo $count; +} + +# $1: policy +find_current_governor() +{ + cat $CPUFREQROOT/$1/scaling_governor +} + +backup_governor() +{ + policies=$(ls $CPUFREQROOT| grep "policy[0-9].*") + for policy in $policies; do + cur_gov=$(find_current_governor $policy) + echo "$policy $cur_gov" >> $OUTFILE.backup_governor.log + done + + printf "Governor $cur_gov backup done.\n" +} + +restore_governor() +{ + i=0; + + policies=$(awk '{print $1}' $OUTFILE.backup_governor.log) + for policy in $policies; do + let i++; + governor=$(sed -n ''$i'p' $OUTFILE.backup_governor.log | awk '{print $2}') + + # switch governor + echo $governor > $CPUFREQROOT/$policy/scaling_governor + done + + printf "Governor restored to $governor.\n" +} + +# $1: governor +switch_governor() +{ + policies=$(ls $CPUFREQROOT| grep "policy[0-9].*") + for policy in $policies; do + filepath=$CPUFREQROOT/$policy/scaling_available_governors + + # Exit if cpu isn't managed by cpufreq core + if [ ! -f $filepath ]; then + return; + fi + + echo $1 > $CPUFREQROOT/$policy/scaling_governor + done + + printf "Switched governor to $1.\n" +} + +# All amd-pstate tests +amd_pstate_all() +{ + printf "\n=============================================\n" + printf "***** Running AMD P-state Sanity Tests *****\n" + printf "=============================================\n\n" + + count=$(count_cpus) + if [ $count = 0 ]; then + printf "No cpu is managed by cpufreq core, exiting\n" + exit; + else + printf "AMD P-state manages: $count CPUs\n" + fi + + # unit test for amd-pstate kernel driver + amd_pstate_basic + + # tbench + amd_pstate_tbench + + # gitsource + amd_pstate_gitsource +} + +help() +{ + printf "Usage: $0 [OPTION...] + [-h <help>] + [-o <output-file-for-dump>] + [-c <all: All testing, + basic: Basic testing, + tbench: Tbench testing, + gitsource: Gitsource testing.>] + [-t <tbench time limit>] + [-p <tbench process number>] + [-l <loop times for tbench>] + [-i <amd tracer interval>] + [-m <comparative test: acpi-cpufreq>] + \n" + exit 2 +} + +parse_arguments() +{ + while getopts ho:c:t:p:l:i:m: arg + do + case $arg in + h) # --help + help + ;; + + c) # --func_type (Function to perform: basic, tbench, gitsource (default: all)) + FUNC=$OPTARG + ;; + + o) # --output-file (Output file to store dumps) + OUTFILE=$OPTARG + ;; + + t) # --tbench-time-limit + TIME_LIMIT=$OPTARG + ;; + + p) # --tbench-process-number + PROCESS_NUM=$OPTARG + ;; + + l) # --tbench/gitsource-loop-times + LOOP_TIMES=$OPTARG + ;; + + i) # --amd-tracer-interval + TRACER_INTERVAL=$OPTARG + ;; + + m) # --comparative-test + COMPARATIVE_TEST=$OPTARG + ;; + + *) + help + ;; + esac + done +} + +command_perf() +{ + if ! command -v perf > /dev/null; then + echo $msg please install perf. >&2 + exit $ksft_skip + fi +} + +command_tbench() +{ + if ! command -v tbench > /dev/null; then + if apt policy dbench > /dev/null 2>&1; then + echo $msg apt install dbench >&2 + exit $ksft_skip + elif yum list available | grep dbench > /dev/null 2>&1; then + echo $msg yum install dbench >&2 + exit $ksft_skip + fi + fi + + if ! command -v tbench > /dev/null; then + echo $msg please install tbench. >&2 + exit $ksft_skip + fi +} + +prerequisite() +{ + if ! echo "$ARCH" | grep -q x86; then + echo "$0 # Skipped: Test can only run on x86 architectures." + exit $ksft_skip + fi + + if ! echo "$VENDOR" | grep -iq amd; then + echo "$0 # Skipped: Test can only run on AMD CPU." + echo "$0 # Current cpu vendor is $VENDOR." + exit $ksft_skip + fi + + scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver) + if [ "$COMPARATIVE_TEST" = "" ]; then + if [ "$scaling_driver" != "$CURRENT_TEST" ]; then + echo "$0 # Skipped: Test can only run on $CURRENT_TEST driver or run comparative test." + echo "$0 # Please set X86_AMD_PSTATE enabled or run comparative test." + echo "$0 # Current cpufreq scaling drvier is $scaling_driver." + exit $ksft_skip + fi + else + case "$FUNC" in + "tbench" | "gitsource") + if [ "$scaling_driver" != "$COMPARATIVE_TEST" ]; then + echo "$0 # Skipped: Comparison test can only run on $COMPARISON_TEST driver." + echo "$0 # Current cpufreq scaling drvier is $scaling_driver." + exit $ksft_skip + fi + ;; + + *) + echo "$0 # Skipped: Comparison test are only for tbench or gitsource." + echo "$0 # Current comparative test is for $FUNC." + exit $ksft_skip + ;; + esac + fi + + if [ ! -w /dev ]; then + echo $msg please run this as root >&2 + exit $ksft_skip + fi + + case "$FUNC" in + "all") + command_perf + command_tbench + ;; + + "tbench") + command_perf + command_tbench + ;; + + "gitsource") + command_perf + ;; + esac + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 2 + fi + + CPUROOT=$SYSFS/devices/system/cpu + CPUFREQROOT="$CPUROOT/cpufreq" + + if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then + echo $msg cpus not available in sysfs >&2 + exit 2 + fi + + if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then + echo $msg cpufreq directory not available in sysfs >&2 + exit 2 + fi +} + +do_test() +{ + # Check if CPUs are managed by cpufreq or not + count=$(count_cpus) + MAKE_CPUS=$((count*2)) + + if [ $count = 0 ]; then + echo "No cpu is managed by cpufreq core, exiting" + exit 2; + fi + + case "$FUNC" in + "all") + amd_pstate_all + ;; + + "basic") + amd_pstate_basic + ;; + + "tbench") + amd_pstate_tbench + ;; + + "gitsource") + amd_pstate_gitsource + ;; + + *) + echo "Invalid [-f] function type" + help + ;; + esac +} + +# clear dumps +pre_clear_dumps() +{ + case "$FUNC" in + "all") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf *.png + ;; + + "tbench") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf tbench_*.png + ;; + + "gitsource") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf gitsource_*.png + ;; + + *) + ;; + esac +} + +post_clear_dumps() +{ + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log +} + +# Parse arguments +parse_arguments $@ + +# Make sure all requirements are met +prerequisite + +# Run requested functions +pre_clear_dumps +do_test | tee -a $OUTFILE.log +post_clear_dumps diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh new file mode 100755 index 000000000000..49c9850341f6 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/tbench.sh @@ -0,0 +1,339 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Testing and monitor the cpu desire performance, frequency, load, +# power consumption and throughput etc.when this script trigger tbench +# test cases. +# 1) Run tbench benchmark on specific governors, ondemand or schedutil. +# 2) Run tbench benchmark comparative test on acpi-cpufreq kernel driver. +# 3) Get desire performance, frequency, load by perf. +# 4) Get power consumption and throughput by amd_pstate_trace.py. +# 5) Analyse test results and save it in file selftest.tbench.csv. +# 6) Plot png images about performance, energy and performance per watt for each test. + +# protect against multiple inclusion +if [ $FILE_TBENCH ]; then + return 0 +else + FILE_TBENCH=DONE +fi + +tbench_governors=("ondemand" "schedutil") + +# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: performance, $7: energy, $8: performance per watt +store_csv_tbench() +{ + echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_TBENCH.csv > /dev/null 2>&1 +} + +# clear some special lines +clear_csv_tbench() +{ + if [ -f $OUTFILE_TBENCH.csv ]; then + sed -i '/Comprison(%)/d' $OUTFILE_TBENCH.csv + sed -i "/$(scaling_name)/d" $OUTFILE_TBENCH.csv + fi +} + +# find string $1 in file csv and get the number of lines +get_lines_csv_tbench() +{ + if [ -f $OUTFILE_TBENCH.csv ]; then + return `grep -c "$1" $OUTFILE_TBENCH.csv` + else + return 0 + fi +} + +pre_clear_tbench() +{ + post_clear_tbench + rm -rf tbench_*.png + clear_csv_tbench +} + +post_clear_tbench() +{ + rm -rf results/tracer-tbench* + rm -rf $OUTFILE_TBENCH*.log + rm -rf $OUTFILE_TBENCH*.result + +} + +# $1: governor, $2: loop +run_tbench() +{ + echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" + ./amd_pstate_trace.py -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + + printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n" + tbench_srv > /dev/null 2>&1 & + perf stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1 + + pid=`pidof tbench_srv` + kill $pid + + for job in `jobs -p` + do + echo "Waiting for job id $job" + wait $job + done +} + +# $1: governor, $2: loop +parse_tbench() +{ + awk '{print $5}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-des-perf-$1-$2.log + avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-des-perf-$1-$2.log) + printf "Tbench-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result + + awk '{print $7}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-freq-$1-$2.log + avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-freq-$1-$2.log) + printf "Tbench-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result + + awk '{print $11}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-load-$1-$2.log + avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-load-$1-$2.log) + printf "Tbench-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result + + grep Throughput $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $2}' > $OUTFILE_TBENCH-throughput-$1-$2.log + tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1-$2.log) + printf "Tbench-$1-#$2 throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result + + grep Joules $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_TBENCH-energy-$1-$2.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1-$2.log) + printf "Tbench-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result + + # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds. + # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # T/t T/t T + # --- = --- = --- + # P E/t E + # with unit given by MB per joule. + ppw=`echo "scale=4;($TIME_LIMIT-1)*$tp_sum/$en_sum" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1-#$2 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + driver_name=`echo $(scaling_name)` + store_csv_tbench "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $tp_sum $en_sum $ppw +} + +# $1: governor +loop_tbench() +{ + printf "\nTbench total test times is $LOOP_TIMES for $1\n\n" + for i in `seq 1 $LOOP_TIMES` + do + run_tbench $1 $i + parse_tbench $1 $i + done +} + +# $1: governor +gather_tbench() +{ + printf "Tbench test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_TBENCH.result + printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_TBENCH-des-perf-$1.log + avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-des-perf-$1.log) + printf "Tbench-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_TBENCH-freq-$1.log + avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-freq-$1.log) + printf "Tbench-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_TBENCH-load-$1.log + avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-load-$1.log) + printf "Tbench-$1 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "throughput(MB/s):" | awk '{print $NF}' > $OUTFILE_TBENCH-throughput-$1.log + tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1.log) + printf "Tbench-$1 total throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result + + avg_tp=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-throughput-$1.log) + printf "Tbench-$1 avg throughput(MB/s): $avg_tp\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_TBENCH-energy-$1.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1.log) + printf "Tbench-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result + + avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-energy-$1.log) + printf "Tbench-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_TBENCH.result + + # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds. + # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # T/t T/t T + # --- = --- = --- + # P E/t E + # with unit given by MB per joule. + ppw=`echo "scale=4;($TIME_LIMIT-1)*$avg_tp/$avg_en" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + driver_name=`echo $(scaling_name)` + store_csv_tbench "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_tp $avg_en $ppw +} + +# $1: base scaling_driver $2: base governor $3: comparative scaling_driver $4: comparative governor +__calc_comp_tbench() +{ + base=`grep "$1-$2" $OUTFILE_TBENCH.csv | grep "Average"` + comp=`grep "$3-$4" $OUTFILE_TBENCH.csv | grep "Average"` + + if [ -n "$base" -a -n "$comp" ]; then + printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result + printf "Tbench comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_TBENCH.result + printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result + + # get the base values + des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//` + freq_base=`echo "$base" | awk '{print $4}' | sed s/,//` + load_base=`echo "$base" | awk '{print $5}' | sed s/,//` + perf_base=`echo "$base" | awk '{print $6}' | sed s/,//` + energy_base=`echo "$base" | awk '{print $7}' | sed s/,//` + ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//` + + # get the comparative values + des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//` + freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//` + load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//` + perf_comp=`echo "$comp" | awk '{print $6}' | sed s/,//` + energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//` + ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//` + + # compare the base and comp values + des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_TBENCH.result + + freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_TBENCH.result + + load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_TBENCH.result + + perf_drop=`echo "scale=4;($perf_comp-$perf_base)*100/$perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 perf base: $perf_base comprison: $perf_comp percent: $perf_drop\n" | tee -a $OUTFILE_TBENCH.result + + energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_TBENCH.result + + ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + store_csv_tbench "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$perf_drop" "$energy_drop" "$ppw_drop" + fi +} + +# calculate the comparison(%) +calc_comp_tbench() +{ + # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[0]} ${tbench_governors[1]} + + # amd-pstate-ondemand VS amd-pstate-schedutil + __calc_comp_tbench ${all_scaling_names[1]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[1]} + + # acpi-cpufreq-ondemand VS amd-pstate-ondemand + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[0]} + + # acpi-cpufreq-schedutil VS amd-pstate-schedutil + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[1]} ${all_scaling_names[1]} ${tbench_governors[1]} +} + +# $1: file_name, $2: title, $3: ylable, $4: column +plot_png_tbench() +{ + # all_scaling_names[1] all_scaling_names[0] flag + # amd-pstate acpi-cpufreq + # N N 0 + # N Y 1 + # Y N 2 + # Y Y 3 + ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + flag=0 + else + flag=1 + fi + else + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + flag=2 + else + flag=3 + fi + fi + + gnuplot << EOF + set term png + set output "$1" + + set title "$2" + set xlabel "Test Cycles (round)" + set ylabel "$3" + + set grid + set style data histogram + set style fill solid 0.5 border + set boxwidth 0.8 + + if ($flag == 1) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}" + } else { + if ($flag == 2) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}" + } else { + if ($flag == 3 ) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}" + } + } + } + quit +EOF +} + +amd_pstate_tbench() +{ + printf "\n---------------------------------------------\n" + printf "*** Running tbench ***" + printf "\n---------------------------------------------\n" + + pre_clear_tbench + + get_lines_csv_tbench "Governor" + if [ $? -eq 0 ]; then + # add titles and unit for csv file + store_csv_tbench "Governor" "Round" "Des-perf" "Freq" "Load" "Performance" "Energy" "Performance Per Watt" + store_csv_tbench "Unit" "" "" "GHz" "" "MB/s" "J" "MB/J" + fi + + backup_governor + for governor in ${tbench_governors[*]} ; do + printf "\nSpecified governor is $governor\n\n" + switch_governor $governor + loop_tbench $governor + gather_tbench $governor + done + restore_governor + + plot_png_tbench "tbench_perfromance.png" "Tbench Benchmark Performance" "Performance" 6 + plot_png_tbench "tbench_energy.png" "Tbench Benchmark Energy" "Energy (J)" 7 + plot_png_tbench "tbench_ppw.png" "Tbench Benchmark Performance Per Watt" "Performance Per Watt (MB/J)" 8 + + calc_comp_tbench + + post_clear_tbench +} diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 9f1a7b5c6193..9f255bc5f31c 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -33,6 +33,12 @@ */ typedef void (*sigill_fn)(void); +static void cssc_sigill(void) +{ + /* CNT x0, x0 */ + asm volatile(".inst 0xdac01c00" : : : "x0"); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -56,6 +62,12 @@ static void sve2_sigill(void) asm volatile(".inst 0x4408A000" : : : "z0"); } +static void sve2p1_sigill(void) +{ + /* BFADD Z0.H, Z0.H, Z0.H */ + asm volatile(".inst 0x65000000" : : : "z0"); +} + static void sveaes_sigill(void) { /* AESD z0.b, z0.b, z0.b */ @@ -119,6 +131,13 @@ static const struct hwcap_data { bool sigill_reliable; } hwcaps[] = { { + .name = "CSSC", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_CSSC, + .cpuinfo = "cssc", + .sigill_fn = cssc_sigill, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, @@ -126,6 +145,12 @@ static const struct hwcap_data { .sigill_fn = rng_sigill, }, { + .name = "RPRFM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_RPRFM, + .cpuinfo = "rprfm", + }, + { .name = "SME", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME, @@ -149,6 +174,13 @@ static const struct hwcap_data { .sigill_fn = sve2_sigill, }, { + .name = "SVE 2.1", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVE2P1, + .cpuinfo = "sve2p1", + .sigill_fn = sve2p1_sigill, + }, + { .name = "SVE AES", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEAES, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index b523c21c2278..acd5e9f3bc0b 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -153,7 +153,7 @@ do_syscall: // Only set a non-zero FFR, test patterns must be zero since the // syscall should clear it - this lets us handle FA64. ldr x2, =ffr_in - ldr p0, [x2, #0] + ldr p0, [x2] ldr x2, [x2, #0] cbz x2, 2f wrffr p0.b @@ -298,7 +298,7 @@ do_syscall: cbz x2, 1f ldr x2, =ffr_out rdffr p0.b - str p0, [x2, #0] + str p0, [x2] 1: // Restore callee saved registers x19-x30 diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 4e62a9199f97..f8b2f41aac36 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -39,10 +39,12 @@ struct child_data { static int epoll_fd; static struct child_data *children; +static struct epoll_event *evs; +static int tests; static int num_children; static bool terminate; -static void drain_output(bool flush); +static int startup_pipe[2]; static int num_processors(void) { @@ -82,12 +84,36 @@ static void child_start(struct child_data *child, const char *program) } /* + * Duplicate the read side of the startup pipe to + * FD 3 so we can close everything else. + */ + ret = dup2(startup_pipe[0], 3); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* * Very dumb mechanism to clean open FDs other than * stdio. We don't want O_CLOEXEC for the pipes... */ - for (i = 3; i < 8192; i++) + for (i = 4; i < 8192; i++) close(i); + /* + * Read from the startup pipe, there should be no data + * and we should block until it is closed. We just + * carry on on error since this isn't super critical. + */ + ret = read(3, &i, sizeof(i)); + if (ret < 0) + fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); + if (ret > 0) + fprintf(stderr, "%d bytes of data on startup pipe\n", + ret); + close(3); + ret = execl(program, program, NULL); fprintf(stderr, "execl(%s) failed: %d (%s)\n", program, errno, strerror(errno)); @@ -112,12 +138,6 @@ static void child_start(struct child_data *child, const char *program) ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", child->name, strerror(errno), errno); } - - /* - * Keep output flowing during child startup so logs - * are more timely, can help debugging. - */ - drain_output(false); } } @@ -290,12 +310,12 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy) { int ret; - child_start(child, "./fpsimd-test"); - ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./fpsimd-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -307,12 +327,12 @@ static void start_sve(struct child_data *child, int vl, int cpu) if (ret < 0) ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl); - child_start(child, "./sve-test"); - ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./sve-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -320,16 +340,16 @@ static void start_ssve(struct child_data *child, int vl, int cpu) { int ret; + ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT); if (ret < 0) ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); child_start(child, "./ssve-test"); - ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); - if (ret == -1) - ksft_exit_fail_msg("asprintf() failed\n"); - ksft_print_msg("Started %s\n", child->name); } @@ -341,12 +361,12 @@ static void start_za(struct child_data *child, int vl, int cpu) if (ret < 0) ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); - child_start(child, "./za-test"); - ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./za-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -375,11 +395,11 @@ static void probe_vls(int vls[], int *vl_count, int set_vl) /* Handle any pending output without blocking */ static void drain_output(bool flush) { - struct epoll_event ev; int ret = 1; + int i; while (ret > 0) { - ret = epoll_wait(epoll_fd, &ev, 1, 0); + ret = epoll_wait(epoll_fd, evs, tests, 0); if (ret < 0) { if (errno == EINTR) continue; @@ -387,8 +407,8 @@ static void drain_output(bool flush) strerror(errno), errno); } - if (ret == 1) - child_output(ev.data.ptr, ev.events, flush); + for (i = 0; i < ret; i++) + child_output(evs[i].data.ptr, evs[i].events, flush); } } @@ -401,10 +421,11 @@ int main(int argc, char **argv) { int ret; int timeout = 10; - int cpus, tests, i, j, c; + int cpus, i, j, c; int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + bool all_children_started = false; + int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; - struct epoll_event ev; struct sigaction sa; while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { @@ -465,6 +486,12 @@ int main(int argc, char **argv) strerror(errno), ret); epoll_fd = ret; + /* Create a pipe which children will block on before execing */ + ret = pipe(startup_pipe); + if (ret != 0) + ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", + strerror(errno), errno); + /* Get signal handers ready before we start any children */ memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = handle_exit_signal; @@ -484,6 +511,11 @@ int main(int argc, char **argv) ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", strerror(errno), errno); + evs = calloc(tests, sizeof(*evs)); + if (!evs) + ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + tests); + for (i = 0; i < cpus; i++) { for (j = 0; j < fpsimd_per_cpu; j++) start_fpsimd(&children[num_children++], i, j); @@ -497,6 +529,13 @@ int main(int argc, char **argv) } } + /* + * All children started, close the startup pipe and let them + * run. + */ + close(startup_pipe[0]); + close(startup_pipe[1]); + for (;;) { /* Did we get a signal asking us to exit? */ if (terminate) @@ -510,7 +549,7 @@ int main(int argc, char **argv) * useful in emulation where we will both be slow and * likely to have a large set of VLs. */ - ret = epoll_wait(epoll_fd, &ev, 1, 1000); + ret = epoll_wait(epoll_fd, evs, tests, 1000); if (ret < 0) { if (errno == EINTR) continue; @@ -519,13 +558,40 @@ int main(int argc, char **argv) } /* Output? */ - if (ret == 1) { - child_output(ev.data.ptr, ev.events, false); + if (ret > 0) { + for (i = 0; i < ret; i++) { + child_output(evs[i].data.ptr, evs[i].events, + false); + } continue; } /* Otherwise epoll_wait() timed out */ + /* + * If the child processes have not produced output they + * aren't actually running the tests yet . + */ + if (!all_children_started) { + seen_children = 0; + + for (i = 0; i < num_children; i++) + if (children[i].output_seen || + children[i].exited) + seen_children++; + + if (seen_children != num_children) { + ksft_print_msg("Waiting for %d children\n", + num_children - seen_children); + continue; + } + + all_children_started = true; + } + + ksft_print_msg("Sending signals, timeout remaining: %d\n", + timeout); + for (i = 0; i < num_children; i++) child_tickle(&children[i]); diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 75fc482d63b6..1dbbbd47dd50 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -32,7 +32,7 @@ static int check_buffer_by_byte(int mem_type, int mode) bool err; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); @@ -69,7 +69,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, char *und_ptr = NULL; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, underflow_range, 0); @@ -165,7 +165,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, char *over_ptr = NULL; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, 0, overflow_range); @@ -338,7 +338,7 @@ static int check_buffer_by_block(int mem_type, int mode) int i, item, result = KSFT_PASS; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); cur_mte_cxt.fault_valid = false; for (i = 0; i < item; i++) { result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); @@ -366,7 +366,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping) { char *ptr; int run, fd; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < total; run++) { @@ -404,7 +404,7 @@ int main(int argc, char *argv[]) { int err; size_t page_size = getpagesize(); - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); sizes[item - 3] = page_size - 1; sizes[item - 2] = page_size; diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index a04b12c21ac9..17694caaff53 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -61,9 +61,8 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i { char *ptr, *map_ptr; int run, result, map_size; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); - item = sizeof(sizes)/sizeof(int); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; @@ -93,7 +92,7 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta { char *ptr, *map_ptr; int run, fd, map_size; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); @@ -132,7 +131,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); prot_flag = PROT_READ | PROT_WRITE; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); @@ -187,7 +186,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) int main(int argc, char *argv[]) { int err; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); err = mte_default_setup(); if (err) diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO index 110ff9fd195d..1f7fba8194fe 100644 --- a/tools/testing/selftests/arm64/signal/testcases/TODO +++ b/tools/testing/selftests/arm64/signal/testcases/TODO @@ -1,2 +1 @@ - Validate that register contents are saved and restored as expected. -- Support and validate extra_context. diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e1c625b20ac4..d2eda7b5de26 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -1,5 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2019 ARM Limited */ + +#include <ctype.h> +#include <string.h> + #include "testcases.h" struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, @@ -109,7 +113,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) bool terminated = false; size_t offs = 0; int flags = 0; - int new_flags; + int new_flags, i; struct extra_context *extra = NULL; struct sve_context *sve = NULL; struct za_context *za = NULL; @@ -117,6 +121,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; void *extra_data = NULL; size_t extra_sz = 0; + char magic[4]; if (!err) return false; @@ -194,11 +199,19 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) /* * A still unknown Magic: potentially freshly added * to the Kernel code and still unknown to the - * tests. + * tests. Magic numbers are supposed to be allocated + * as somewhat meaningful ASCII strings so try to + * print as such as well as the raw number. */ + memcpy(magic, &head->magic, sizeof(magic)); + for (i = 0; i < sizeof(magic); i++) + if (!isalnum(magic[i])) + magic[i] = '?'; + fprintf(stdout, - "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n", - head->magic); + "SKIP Unknown MAGIC: 0x%X (%c%c%c%c) - Is KSFT arm64/signal up to date ?\n", + head->magic, + magic[3], magic[2], magic[1], magic[0]); break; } diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index 939de574fc7f..f748f2c33b22 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,6 +1,7 @@ # TEMPORARY +# Alphabetical order get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge -stacktrace_build_id_nmi stacktrace_build_id +stacktrace_build_id_nmi task_fd_query_rawtp varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 new file mode 100644 index 000000000000..99cc33c51eaa --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -0,0 +1,84 @@ +bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 +bpf_cookie/lsm +bpf_cookie/multi_kprobe_attach_api +bpf_cookie/multi_kprobe_link_api +bpf_cookie/trampoline +bpf_loop/check_callback_fn_stop # link unexpected error: -524 +bpf_loop/check_invalid_flags +bpf_loop/check_nested_calls +bpf_loop/check_non_constant_callback +bpf_loop/check_nr_loops +bpf_loop/check_null_callback_ctx +bpf_loop/check_stack +bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) +bpf_tcp_ca/dctcp_fallback +btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 +cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) +d_path/basic # setup attach failed: -524 +deny_namespace # attach unexpected error: -524 (errno 524) +fentry_fexit # fentry_attach unexpected error: -1 (errno 524) +fentry_test # fentry_attach unexpected error: -1 (errno 524) +fexit_sleep # fexit_attach fexit attach failed: -1 +fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 +fexit_test # fexit_attach unexpected error: -1 (errno 524) +get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) +htab_update/reenter_update +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF +kfunc_call/subprog_lskel # skel unexpected error: -2 +kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 +kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) +ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF +ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 +libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) +linked_list +lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) +lru_bug # lru_bug__attach unexpected error: -524 (errno 524) +modify_return # modify_return__attach failed unexpected error: -524 (errno 524) +module_attach # skel_attach skeleton attach failed: -524 +mptcp/base # run_test mptcp unexpected error: -524 (errno 524) +netcnt # packets unexpected packets: actual 10001 != expected 10000 +rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 +recursion # skel_attach unexpected error: -524 (errno 524) +ringbuf # skel_attach skeleton attachment failed: -1 +setget_sockopt # attach_cgroup unexpected error: -524 +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) +socket_cookie # prog_attach unexpected error: -524 +stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 +task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) +task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) +test_bprm_opts # attach attach failed: -524 +test_ima # attach attach failed: -524 +test_local_storage # attach lsm attach failed: -524 +test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) +test_overhead # attach_fentry unexpected error: -524 +timer # timer unexpected error: -524 (errno 524) +timer_crash # timer_crash__attach unexpected error: -524 (errno 524) +timer_mim # timer_mim unexpected error: -524 (errno 524) +trace_printk # trace_printk__attach unexpected error: -1 (errno 524) +trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) +tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) +trampoline_count # attach_prog unexpected error: -524 +unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_producer_wrong_offset +user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz +user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_sample_full_ring_buffer +user_ringbuf/test_user_ringbuf_post_alignment_autoadjust +user_ringbuf/test_user_ringbuf_overfill +user_ringbuf/test_user_ringbuf_discards_properly_ignored +user_ringbuf/test_user_ringbuf_loop +user_ringbuf/test_user_ringbuf_msg_protocol +user_ringbuf/test_user_ringbuf_blocking_reserve +verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) +vmlinux # skel_attach skeleton attach failed: -524 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 17e074eb42b8..585fcf73c731 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,13 +1,20 @@ # TEMPORARY +# Alphabetical order atomics # attach(add): actual -524 <= expected 0 (trampoline) -bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) -bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) bpf_loop # attaches to __x64_sys_nanosleep bpf_mod_race # BPF trampoline bpf_nf # JIT does not support calling kernel function +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +cb_refs # expected error message unexpected error: -524 (trampoline) +cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) +cgrp_kfunc # JIT does not support calling kernel function +cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) core_read_macros # unknown func bpf_probe_read#4 (overlapping) d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +deny_namespace # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) fentry_fexit # fentry attach failed: -524 (trampoline) fentry_test # fentry_first_attach unexpected error: -524 (trampoline) @@ -18,19 +25,31 @@ fexit_test # fexit_first_attach unexpected error: get_func_args_test # trampoline get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) kfree_skb # attach fentry unexpected error: -524 (trampoline) kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) +kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test # relies on fentry ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +libbpf_get_fd_by_id_opts # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +linked_list # JIT does not support calling kernel function (kfunc) +lookup_key # JIT does not support calling kernel function (kfunc) +lru_bug # prog 'printk': failed to auto-attach: -524 +map_kptr # failed to open_and_load program: -524 (trampoline) modify_return # modify_return attach failed: -524 (trampoline) module_attach # skel_attach skeleton attach failed: -524 (trampoline) mptcp -kprobe_multi_test # relies on fentry netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) probe_user # check_kprobe_res wrong kprobe res from probe read (?) +rcu_read_lock # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) recursion # skel_attach unexpected error: -524 (trampoline) ringbuf # skel_load skeleton load failed (?) +select_reuseport # intermittently fails on new s390x setup +send_signal # intermittently fails to receive signal +setget_sockopt # attach unexpected error: -524 (trampoline) sk_assign # Can't read on server: Invalid argument (?) sk_lookup # endianness problem sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) @@ -38,6 +57,7 @@ skc_to_unix_sock # could not attach BPF object unexpecte socket_cookie # prog_attach unexpected error: -524 (trampoline) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_kfunc # JIT does not support calling kernel function task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) test_bpffs # bpffs test failed 255 (iterator) test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) @@ -52,26 +72,17 @@ timer_mim # failed to auto-attach program 'test1' trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +tracing_struct # failed to auto-attach: -524 (trampoline) trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +type_cast # JIT does not support calling kernel function +unpriv_bpf_disabled # fentry +user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +verify_pkcs7_sig # JIT does not support calling kernel function (kfunc) vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -map_kptr # failed to open_and_load program: -524 (trampoline) -bpf_cookie # failed to open_and_load program: -524 (trampoline) xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) -send_signal # intermittently fails to receive signal -select_reuseport # intermittently fails on new s390x setup xdp_synproxy # JIT does not support calling kernel function (kfunc) -unpriv_bpf_disabled # fentry -lru_bug # prog 'printk': failed to auto-attach: -524 -setget_sockopt # attach unexpected error: -524 (trampoline) -cb_refs # expected error message unexpected error: -524 (trampoline) -cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) -htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) -tracing_struct # failed to auto-attach: -524 (trampoline) -user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) -lookup_key # JIT does not support calling kernel function (kfunc) -verify_pkcs7_sig # JIT does not support calling kernel function (kfunc) -kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) +xfrm_info # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e6cf21fad69f..c22c43bbee19 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -182,14 +182,15 @@ endif $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ - -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ liburandom_read.so $(LDLIBS) \ - -fuse-ld=$(LLD) -Wl,-znoseparate-code \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -Wl,-rpath=. -o $@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(call msg,SIGN-FILE,,$@) @@ -200,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_testmod + $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool @@ -309,9 +310,9 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef # Determine target endianness. @@ -319,7 +320,11 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) @@ -359,9 +364,11 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \ - test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ - map_ptr_kern.c core_kern.c core_kern_overflow.c +LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ + trace_printk.c trace_vprintk.c map_ptr_kern.c \ + core_kern.c core_kern_overflow.c test_ringbuf.c \ + test_ringbuf_map_key.c + # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ kfunc_call_test_subprog.c @@ -520,13 +527,15 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c + cap_helpers.c test_loader.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ - ima_setup.sh verify_sig_setup.sh \ - $(wildcard progs/btf_dump_test_case_*.c) + ima_setup.sh \ + verify_sig_setup.sh \ + $(wildcard progs/btf_dump_test_case_*.c) \ + $(wildcard progs/*.bpf.o) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) @@ -539,7 +548,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,) $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc)) endif diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index d3c6b3da0bb1..cb9b95702ac6 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -6,18 +6,59 @@ General instructions on running selftests can be found in __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests +============= +BPF CI System +============= + +BPF employs a continuous integration (CI) system to check patch submission in an +automated fashion. The system runs selftests for each patch in a series. Results +are propagated to patchwork, where failures are highlighted similar to +violations of other checks (such as additional warnings being emitted or a +``scripts/checkpatch.pl`` reported deficiency): + + https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173 + +The CI system executes tests on multiple architectures. It uses a kernel +configuration derived from both the generic and architecture specific config +file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and +``config.x86_64``). + +Denylisting Tests +================= + +It is possible for some architectures to not have support for all BPF features. +In such a case tests in CI may fail. An example of such a shortcoming is BPF +trampoline support on IBM's s390x architecture. For cases like this, an in-tree +deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can +be used to prevent the test from running on such an architecture. + +In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is +honored on every architecture running tests. + +These files are organized in three columns. The first column lists the test in +question. This can be the name of a test suite or of an individual test. The +remaining two columns provide additional meta data that helps identify and +classify the entry: column two is a copy and paste of the error being reported +when running the test in the setting in question. The third column, if +available, summarizes the underlying problem. A value of ``trampoline``, for +example, indicates that lack of trampoline support is causing the test to fail. +This last entry helps identify tests that can be re-enabled once such support is +added. + ========================= Running Selftests in a VM ========================= It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``. The script tries to ensure that the tests are run with the same environment as they -would be run post-submit in the CI used by the Maintainers. +would be run post-submit in the CI used by the Maintainers, with the exception +that deny lists are not automatically honored. -This script downloads a suitable Kconfig and VM userspace image from the system used by -the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the -bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and -saves the resulting output (by default in ``~/.bpf_selftests``). +This script uses the in-tree kernel configuration and downloads a VM userspace +image from the system used by the CI. It builds the kernel (without overwriting +your existing Kconfig), recompiles the bpf selftests, runs them (by default +``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by +default in ``~/.bpf_selftests``). Script dependencies: - clang (preferably built from sources, https://github.com/llvm/llvm-project); @@ -26,7 +67,7 @@ Script dependencies: - docutils (for ``rst2man``); - libcap-devel. -For more information on about using the script, run: +For more information about using the script, run: .. code-block:: console diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h new file mode 100644 index 000000000000..424f7bbbfe9b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -0,0 +1,68 @@ +#ifndef __BPF_EXPERIMENTAL__ +#define __BPF_EXPERIMENTAL__ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) + +/* Description + * Allocates an object of the type represented by 'local_type_id' in + * program BTF. User may use the bpf_core_type_id_local macro to pass the + * type ID of a struct in program BTF. + * + * The 'local_type_id' parameter must be a known constant. + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * A pointer to an object of the type corresponding to the passed in + * 'local_type_id', or NULL on failure. + */ +extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_new_impl */ +#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) + +/* Description + * Free an allocated object. All fields of the object that require + * destruction will be destructed before the storage is freed. + * + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * Void. + */ +extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_drop_impl */ +#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) + +/* Description + * Add a new entry to the beginning of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Add a new entry to the end of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Remove the entry at the beginning of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; + +/* Description + * Remove the entry at the end of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 845209581440..bc4555a003a7 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,15 +2,22 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ +#if __GNUC__ && !__clang__ +/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We + * provide the "standard" names as synonyms of the corresponding GCC + * builtins. Note how the SKB argument is ignored. + */ +#define load_byte(skb, off) __builtin_bpf_load_byte(off) +#define load_half(skb, off) __builtin_bpf_load_half(off) +#define load_word(skb, off) __builtin_bpf_load_word(off) +#else /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); +unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word"); +#endif #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a6021d6117b5..5085fea3cac5 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -128,6 +128,23 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) } } +noinline int bpf_testmod_fentry_test1(int a) +{ + return a + 1; +} + +noinline int bpf_testmod_fentry_test2(int a, u64 b) +{ + return a + b; +} + +noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +int bpf_testmod_fentry_ok; + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -167,6 +184,13 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, return snprintf(buf, len, "%d\n", writable.val); } + if (bpf_testmod_fentry_test1(1) != 2 || + bpf_testmod_fentry_test2(2, 3) != 5 || + bpf_testmod_fentry_test3(4, 5, 6) != 15) + goto out; + + bpf_testmod_fentry_ok = 1; +out: return -EIO; /* always fail */ } EXPORT_SYMBOL(bpf_testmod_test_read); diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index a3352a64c067..10587a29b967 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -20,6 +20,25 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + #define __bpf_percpu_val_align __attribute__((__aligned__(8))) #define BPF_DECLARE_PERCPU(type, name) \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e914cc45b766..9e95b37a7dff 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -13,6 +13,7 @@ #include <ftw.h> #include "cgroup_helpers.h" +#include "bpf_util.h" /* * To avoid relying on the system setup, when setup_cgroup_env is called @@ -77,7 +78,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers enable[len] = 0; close(fd); } else { - strncpy(enable, controllers, sizeof(enable)); + bpf_strlcpy(enable, controllers, sizeof(enable)); } snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); @@ -332,6 +333,25 @@ int get_root_cgroup(void) return fd; } +/* + * remove_cgroup() - Remove a cgroup + * @relative_path: The cgroup path, relative to the workdir, to remove + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup(const char *relative_path) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 3358734356ab..f099a166c94d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -18,6 +18,7 @@ int write_cgroup_file_parent(const char *relative_path, const char *file, int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); +void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); int join_cgroup(const char *relative_path); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 9213565c0311..612f699dc4f7 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,4 +1,6 @@ CONFIG_BLK_DEV_LOOP=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y @@ -6,6 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y CONFIG_BPF_SYSCALL=y +# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y @@ -20,6 +23,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IMA=y CONFIG_IMA_READ_POLICY=y CONFIG_IMA_WRITE_POLICY=y +CONFIG_INET_ESP=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_RAW=y CONFIG_IP_NF_TARGET_SYNPROXY=y @@ -67,7 +71,8 @@ CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y -CONFIG_TEST_BPF=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y +CONFIG_XFRM_INTERFACE=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 new file mode 100644 index 000000000000..1f0437644186 --- /dev/null +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -0,0 +1,181 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_ARM_SMMU_V3=y +CONFIG_ATA=y +CONFIG_AUDIT=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_SD=y +CONFIG_BONDING=y +CONFIG_BPFILTER=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_BRIDGE=m +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CHR_DEV_SG=y +CONFIG_COMPAT=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM=y +CONFIG_DUMMY=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FANOTIFY=y +CONFIG_FB=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_FUSE_FS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET_ESP=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MAILBOX=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_NAMESPACES=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NF_TABLES=y +CONFIG_NLMON=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_OVERLAY_FS=y +CONFIG_PACKET_DIAG=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_PL320_MBOX=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SCSI=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TCG_TIS=y +CONFIG_TCG_TPM=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index f8a7a258a718..d49f6170e7bd 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -82,9 +82,6 @@ CONFIG_MARCH_Z196_TUNE=y CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULES=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_9P=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 21ce5ea4304e..dd97d61d325c 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y -CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_KPROBE_OVERRIDE=y diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index bec15558fd93..01de33191226 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -390,45 +390,6 @@ struct nstoken { int orig_netns_fd; }; -static int setns_by_fd(int nsfd) -{ - int err; - - err = setns(nsfd, CLONE_NEWNET); - close(nsfd); - - if (!ASSERT_OK(err, "setns")) - return err; - - /* Switch /sys to the new namespace so that e.g. /sys/class/net - * reflects the devices in the new namespace. - */ - err = unshare(CLONE_NEWNS); - if (!ASSERT_OK(err, "unshare")) - return err; - - /* Make our /sys mount private, so the following umount won't - * trigger the global umount in case it's shared. - */ - err = mount("none", "/sys", NULL, MS_PRIVATE, NULL); - if (!ASSERT_OK(err, "remount private /sys")) - return err; - - err = umount2("/sys", MNT_DETACH); - if (!ASSERT_OK(err, "umount2 /sys")) - return err; - - err = mount("sysfs", "/sys", "sysfs", 0, NULL); - if (!ASSERT_OK(err, "mount /sys")) - return err; - - err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); - if (!ASSERT_OK(err, "mount /sys/fs/bpf")) - return err; - - return 0; -} - struct nstoken *open_netns(const char *name) { int nsfd; @@ -449,8 +410,9 @@ struct nstoken *open_netns(const char *name) if (!ASSERT_GE(nsfd, 0, "open netns fd")) goto fail; - err = setns_by_fd(nsfd); - if (!ASSERT_OK(err, "setns_by_fd")) + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + if (!ASSERT_OK(err, "setns")) goto fail; return token; @@ -461,6 +423,7 @@ fail: void close_netns(struct nstoken *token) { - ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns"); + close(token->orig_netns_fd); free(token); } diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 970f09156eb4..4666f88f2bb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -2,7 +2,7 @@ #include <test_progs.h> #define MAX_INSNS 512 -#define MAX_MATCHES 16 +#define MAX_MATCHES 24 struct bpf_reg_match { unsigned int line; @@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = { */ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), @@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = { BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), @@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = { {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Variable offset is added to R5 packet pointer, - * resulting in auxiliary alignment of 4. + * resulting in auxiliary alignment of 4. To avoid BPF + * verifier's precision backtracking logging + * interfering we also have a no-op R4 = R5 + * instruction to validate R5 state. We also check + * that R4 is what it should be in such case. */ - {17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {25, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w=pkt(off=14,r=8"}, /* Variable offset is added to R5, resulting in a - * variable offset of (4n). + * variable offset of (4n). See comment for insn #18 + * for R4 = R5 trick. */ - {26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, }, }, { @@ -681,6 +691,6 @@ void test_align(void) if (!test__start_subtest(test->descr)) continue; - CHECK_FAIL(do_test_single(test)); + ASSERT_OK(do_test_single(test), test->descr); } } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 3369c5ec3a17..6f8ed61fc4b4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include <unistd.h> #include <sys/syscall.h> +#include <task_local_storage_helpers.h> #include "bpf_iter_ipv6_route.skel.h" #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" @@ -175,11 +176,6 @@ static void test_bpf_map(void) bpf_iter_bpf_map__destroy(skel); } -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(SYS_pidfd_open, pid, flags); -} - static void check_bpf_link_info(const struct bpf_program *prog) { LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -295,8 +291,8 @@ static void test_task_pidfd(void) union bpf_iter_link_info linfo; int pidfd; - pidfd = pidfd_open(getpid(), 0); - if (!ASSERT_GT(pidfd, 0, "pidfd_open")) + pidfd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_GT(pidfd, 0, "sys_pidfd_open")) return; memset(&linfo, 0, sizeof(linfo)); @@ -945,10 +941,10 @@ static void test_bpf_array_map(void) { __u64 val, expected_val = 0, res_first_val, first_val = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); - __u32 expected_key = 0, res_first_key; + __u32 key, expected_key = 0, res_first_key; + int err, i, map_fd, hash_fd, iter_fd; struct bpf_iter_bpf_array_map *skel; union bpf_iter_link_info linfo; - int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; int len, start; @@ -1005,12 +1001,20 @@ static void test_bpf_array_map(void) if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; + hash_fd = bpf_map__fd(skel->maps.hashmap1); for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { err = bpf_map_lookup_elem(map_fd, &i, &val); - if (!ASSERT_OK(err, "map_lookup")) - goto out; - if (!ASSERT_EQ(i, val, "invalid_val")) - goto out; + if (!ASSERT_OK(err, "map_lookup arraymap1")) + goto close_iter; + if (!ASSERT_EQ(i, val, "invalid_val arraymap1")) + goto close_iter; + + val = i + 4; + err = bpf_map_lookup_elem(hash_fd, &val, &key); + if (!ASSERT_OK(err, "map_lookup hashmap1")) + goto close_iter; + if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1")) + goto close_iter; } close_iter: @@ -1498,7 +1502,6 @@ static noinline int trigger_func(int arg) static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc) { struct bpf_iter_vma_offset *skel; - struct bpf_link *link; char buf[16] = {}; int iter_fd, len; int pgsz, shift; @@ -1513,11 +1516,11 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool ; skel->bss->page_shift = shift; - link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); - if (!ASSERT_OK_PTR(link, "attach_iter")) - return; + skel->links.get_vma_offset = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); + if (!ASSERT_OK_PTR(skel->links.get_vma_offset, "attach_iter")) + goto exit; - iter_fd = bpf_iter_create(bpf_link__fd(link)); + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.get_vma_offset)); if (!ASSERT_GT(iter_fd, 0, "create_iter")) goto exit; @@ -1535,7 +1538,7 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool close(iter_fd); exit: - bpf_link__destroy(link); + bpf_iter_vma_offset__destroy(skel); } static void test_task_vma_offset(void) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 8a838ea8bdf3..c8ba4009e4ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -49,14 +49,14 @@ out: static void test_bpf_nf_ct(int mode) { - const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; int srv_fd = -1, client_fd = -1, srv_client_fd = -1; struct sockaddr_in peer_addr = {}; struct test_bpf_nf *skel; int prog_fd, err; socklen_t len; u16 srv_port; - char cmd[64]; + char cmd[128]; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), @@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode) /* Enable connection tracking */ snprintf(cmd, sizeof(cmd), iptables, "-A"); - if (!ASSERT_OK(system(cmd), "iptables")) + if (!ASSERT_OK(system(cmd), cmd)) goto end; srv_port = (mode == TEST_XDP) ? 5005 : 5006; diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 24dd6214394e..de1b5b9eb93a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3949,6 +3949,20 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid return type", }, { + .descr = "decl_tag test #17, func proto, argument", + .raw_types = { + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag1\0var"), + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -7133,7 +7147,7 @@ static struct btf_dedup_test dedup_tests[] = { BTF_ENUM_ENC(NAME_NTH(4), 456), /* [4] fwd enum 'e2' after full enum */ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), - /* [5] incompatible fwd enum with different size */ + /* [5] fwd enum with different size, size does not matter for fwd */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), /* [6] incompatible full enum with different value */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), @@ -7150,9 +7164,7 @@ static struct btf_dedup_test dedup_tests[] = { /* [2] full enum 'e2' */ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), BTF_ENUM_ENC(NAME_NTH(4), 456), - /* [3] incompatible fwd enum with different size */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), - /* [4] incompatible full enum with different value */ + /* [3] incompatible full enum with different value */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), BTF_ENUM_ENC(NAME_NTH(2), 321), BTF_END_RAW, @@ -7611,7 +7623,263 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0e1\0e1_val"), }, }, - +{ + .descr = "dedup: enum of different size: no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2), + BTF_ENUM_ENC(NAME_NTH(2), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2), + BTF_ENUM_ENC(NAME_NTH(2), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum fwd to enum64", + .input = { + .raw_types = { + /* [1] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + /* [2] enum 'e1' fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [3] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, + .expect = { + .raw_types = { + /* [1] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + /* [2] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, +}, +{ + .descr = "dedup: enum64 fwd to enum", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [3] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration struct", + /* + * Verify that CU1:foo and CU2:foo would be unified and that + * typedef/ptr would be updated to point to CU1:foo. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * struct foo; + * typedef struct foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_PTR_ENC(1), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration union", + /* + * Verify that CU1:foo and CU2:foo would be unified and that + * typedef/ptr would be updated to point to CU1:foo. + * Same as "dedup: standalone fwd declaration struct" but for unions. + * + * // CU 1: + * union foo { int x; }; + * + * // CU 2: + * union foo; + * typedef union foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_TBD, 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_PTR_ENC(1), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration wrong kind", + /* + * Negative test for btf_dedup_resolve_fwds: + * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped; + * - typedef/ptr should remain unchanged as well. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * union foo; + * typedef union foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration name conflict", + /* + * Negative test for btf_dedup_resolve_fwds: + * - two candidates for CU2:foo dedup, thus it is unchanged; + * - typedef/ptr should remain unchanged as well. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * struct foo; + * typedef struct foo *foo_ptr; + * + * // CU 3: + * struct foo { int x; int y; }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */ + /* CU 3 */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */ + /* CU 3 */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"), + }, +}, }; static int btf_type_size(const struct btf_type *t) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 90aac437576d..d9024c7a892a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -143,6 +143,10 @@ static void test_split_fwd_resolve() { btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ /* } */ + /* keep this not a part of type the graph to test btf_dedup_resolve_fwds */ + btf__add_struct(btf1, "s3", 4); /* [6] struct s3 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ VALIDATE_RAW_BTF( btf1, @@ -153,20 +157,24 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=2 bits_offset=0\n" "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[6] STRUCT 's3' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0"); btf2 = btf__new_empty_split(btf1); if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) goto cleanup; - btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ - btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ - btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ - btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */ - btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ - btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ - btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [7] int */ + btf__add_ptr(btf2, 11); /* [8] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [9] fwd for struct s2 */ + btf__add_ptr(btf2, 9); /* [10] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [11] struct s1 { */ + btf__add_field(btf2, "f1", 8, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 10, 64, 0); /* struct s2 *f2; */ /* } */ + btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT); /* [12] fwd for struct s3 */ + btf__add_ptr(btf2, 12); /* [13] ptr to struct s1 */ VALIDATE_RAW_BTF( btf2, @@ -178,13 +186,17 @@ static void test_split_fwd_resolve() { "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0", - "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[7] PTR '(anon)' type_id=10", - "[8] FWD 's2' fwd_kind=struct", - "[9] PTR '(anon)' type_id=8", - "[10] STRUCT 's1' size=16 vlen=2\n" - "\t'f1' type_id=7 bits_offset=0\n" - "\t'f2' type_id=9 bits_offset=64"); + "[6] STRUCT 's3' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[8] PTR '(anon)' type_id=11", + "[9] FWD 's2' fwd_kind=struct", + "[10] PTR '(anon)' type_id=9", + "[11] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=8 bits_offset=0\n" + "\t'f2' type_id=10 bits_offset=64", + "[12] FWD 's3' fwd_kind=struct", + "[13] PTR '(anon)' type_id=12"); err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) @@ -199,7 +211,10 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=2 bits_offset=0\n" "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" - "\t'f1' type_id=1 bits_offset=0"); + "\t'f1' type_id=1 bits_offset=0", + "[6] STRUCT 's3' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[7] PTR '(anon)' type_id=6"); cleanup: btf__free(btf2); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 24da335482d4..0ba2e8b9c6ac 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -791,11 +791,11 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, sizeof(struct bpf_sock_ops) - 1, "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", - { .op = 1, .skb_tcp_flags = 2}); + { .op = 1, .skb_hwtstamp = 2}); TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, sizeof(struct bpf_sock_ops) - 1, "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", - { .op = 1, .skb_tcp_flags = 0}); + { .op = 1, .skb_hwtstamp = 0}); } static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 7a277035c275..ef4d6a3ae423 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -9,6 +9,7 @@ #include <string.h> #include <errno.h> #include <sched.h> +#include <net/if.h> #include <linux/compiler.h> #include <bpf/libbpf.h> @@ -20,10 +21,12 @@ static struct test_btf_skc_cls_ingress *skel; static struct sockaddr_in6 srv_sa6; static __u32 duration; -#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" - static int prepare_netns(void) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach, + .prog_fd = bpf_program__fd(skel->progs.cls_ingress)); + if (CHECK(unshare(CLONE_NEWNET), "create netns", "unshare(CLONE_NEWNET): %s (%d)", strerror(errno), errno)) @@ -33,12 +36,12 @@ static int prepare_netns(void) "ip link set dev lo up", "failed\n")) return -1; - if (CHECK(system("tc qdisc add dev lo clsact"), - "tc qdisc add dev lo clsact", "failed\n")) + qdisc_lo.ifindex = if_nametoindex("lo"); + if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) return -1; - if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE), - "install tc cls-prog at ingress", "failed\n")) + if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), + "filter add dev lo ingress")) return -1; /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the @@ -195,19 +198,12 @@ static struct test tests[] = { void test_btf_skc_cls_ingress(void) { - int i, err; + int i; skel = test_btf_skc_cls_ingress__open_and_load(); if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) return; - err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE); - if (CHECK(err, "bpf_program__pin", - "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) { - test_btf_skc_cls_ingress__destroy(skel); - return; - } - for (i = 0; i < ARRAY_SIZE(tests); i++) { if (!test__start_subtest(tests[i].desc)) continue; @@ -221,6 +217,5 @@ void test_btf_skc_cls_ingress(void) reset_test(); } - bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE); test_btf_skc_cls_ingress__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index c4a2adb38da1..e02feb5fae97 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -189,6 +189,80 @@ static void test_walk_self_only(struct cgroup_iter *skel) BPF_CGROUP_ITER_SELF_ONLY, "self_only"); } +static void test_walk_dead_self_only(struct cgroup_iter *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + char expected_output[128], buf[128]; + const char *cgrp_name = "/dead"; + union bpf_iter_link_info linfo; + int len, cgrp_fd, iter_fd; + struct bpf_link *link; + size_t left; + char *p; + + cgrp_fd = create_and_get_cgroup(cgrp_name); + if (!ASSERT_GE(cgrp_fd, 0, "create cgrp")) + return; + + /* The cgroup will be dead during read() iteration, so it only has + * epilogue in the output + */ + snprintf(expected_output, sizeof(expected_output), EPILOGUE); + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgrp_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto close_cgrp; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto free_link; + + /* Close link fd and cgroup fd */ + bpf_link__destroy(link); + close(cgrp_fd); + + /* Remove cgroup to mark it as dead */ + remove_cgroup(cgrp_name); + + /* Two kern_sync_rcu() and usleep() pairs are used to wait for the + * releases of cgroup css, and the last kern_sync_rcu() and usleep() + * pair is used to wait for the free of cgroup itself. + */ + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(1000); + + memset(buf, 0, sizeof(buf)); + left = ARRAY_SIZE(buf); + p = buf; + while ((len = read(iter_fd, p, left)) > 0) { + p += len; + left -= len; + } + + ASSERT_STREQ(buf, expected_output, "dead cgroup output"); + + /* read() after iter finishes should be ok. */ + if (len == 0) + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read"); + + close(iter_fd); + return; +free_link: + bpf_link__destroy(link); +close_cgrp: + close(cgrp_fd); +} + void test_cgroup_iter(void) { struct cgroup_iter *skel = NULL; @@ -217,6 +291,8 @@ void test_cgroup_iter(void) test_early_termination(skel); if (test__start_subtest("cgroup_iter__self_only")) test_walk_self_only(skel); + if (test__start_subtest("cgroup_iter__dead_self_only")) + test_walk_dead_self_only(skel); out: cgroup_iter__destroy(skel); cleanup_cgroups(); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c new file mode 100644 index 000000000000..973f0c5af965 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <cgroup_helpers.h> +#include <test_progs.h> + +#include "cgrp_kfunc_failure.skel.h" +#include "cgrp_kfunc_success.skel.h" + +static size_t log_buf_sz = 1 << 20; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void) +{ + struct cgrp_kfunc_success *skel; + int err; + + skel = cgrp_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = cgrp_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + cgrp_kfunc_success__destroy(skel); + return NULL; +} + +static int mkdir_rm_test_dir(void) +{ + int fd; + const char *cgrp_path = "cgrp_kfunc"; + + fd = create_and_get_cgroup(cgrp_path); + if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd")) + return -1; + + close(fd); + remove_cgroup(cgrp_path); + + return 0; +} + +static void run_success_test(const char *prog_name) +{ + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count"); + if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count"); + ASSERT_OK(skel->bss->err, "post_rmdir_err"); + +cleanup: + bpf_link__destroy(link); + cgrp_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_cgrp_acquire_release_argument", + "test_cgrp_acquire_leave_in_map", + "test_cgrp_xchg_release", + "test_cgrp_get_release", + "test_cgrp_get_ancestors", +}; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} failure_tests[] = { + {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"}, + {"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"}, + {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"}, + {"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_acquire_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_get_null", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_xchg_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_get_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"}, + {"cgrp_kfunc_release_fp", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, + {"cgrp_kfunc_release_unacquired", "release kernel function bpf_cgroup_release expects"}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct cgrp_kfunc_failure *skel; + int err, i; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = cgrp_kfunc_failure__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "cgrp_kfunc_failure__open_opts")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + struct bpf_program *prog; + const char *curr_name = failure_tests[i].prog_name; + + prog = bpf_object__find_program_by_name(skel->obj, curr_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name)); + } + + err = cgrp_kfunc_failure__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + cgrp_kfunc_failure__destroy(skel); +} + +void test_cgrp_kfunc(void) +{ + int i, err; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "cgrp_env_setup")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + if (!test__start_subtest(failure_tests[i].prog_name)) + continue; + + verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); + } + +cleanup: + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c new file mode 100644 index 000000000000..33a2776737e7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include "cgrp_ls_tp_btf.skel.h" +#include "cgrp_ls_recursion.skel.h" +#include "cgrp_ls_attach_cgroup.skel.h" +#include "cgrp_ls_negative.skel.h" +#include "cgrp_ls_sleepable.skel.h" +#include "network_helpers.h" +#include "cgroup_helpers.h" + +struct socket_cookie { + __u64 cookie_key; + __u32 cookie_value; +}; + +static void test_tp_btf(int cgroup_fd) +{ + struct cgrp_ls_tp_btf *skel; + long val1 = 1, val2 = 0; + int err; + + skel = cgrp_ls_tp_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + /* populate a value in map_b */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); + if (!ASSERT_OK(err, "map_update_elem")) + goto out; + + /* check value */ + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2); + if (!ASSERT_OK(err, "map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val")) + goto out; + + /* delete value */ + err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd); + if (!ASSERT_OK(err, "map_delete_elem")) + goto out; + + skel->bss->target_pid = syscall(SYS_gettid); + + err = cgrp_ls_tp_btf__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_gettid); + syscall(SYS_gettid); + + skel->bss->target_pid = 0; + + /* 3x syscalls: 1x attach and 2x gettid */ + ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); + ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); + ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); +out: + cgrp_ls_tp_btf__destroy(skel); +} + +static void test_attach_cgroup(int cgroup_fd) +{ + int server_fd = 0, client_fd = 0, err = 0; + socklen_t addr_len = sizeof(struct sockaddr_in6); + struct cgrp_ls_attach_cgroup *skel; + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + struct socket_cookie val; + + skel = cgrp_ls_attach_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->links.set_cookie = bpf_program__attach_cgroup( + skel->progs.set_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) + goto out; + + skel->links.update_cookie_sockops = bpf_program__attach_cgroup( + skel->progs.update_cookie_sockops, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach")) + goto out; + + skel->links.update_cookie_tracing = bpf_program__attach( + skel->progs.update_cookie_tracing); + if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach")) + goto out; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto close_server_fd; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies), + &cgroup_fd, &val); + if (!ASSERT_OK(err, "map_lookup(socket_cookies)")) + goto close_client_fd; + + err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len); + if (!ASSERT_OK(err, "getsockname")) + goto close_client_fd; + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value"); + +close_client_fd: + close(client_fd); +close_server_fd: + close(server_fd); +out: + cgrp_ls_attach_cgroup__destroy(skel); +} + +static void test_recursion(int cgroup_fd) +{ + struct cgrp_ls_recursion *skel; + int err; + + skel = cgrp_ls_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = cgrp_ls_recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* trigger sys_enter, make sure it does not cause deadlock */ + syscall(SYS_gettid); + +out: + cgrp_ls_recursion__destroy(skel); +} + +static void test_negative(void) +{ + struct cgrp_ls_negative *skel; + + skel = cgrp_ls_negative__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) { + cgrp_ls_negative__destroy(skel); + return; + } +} + +static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct cgrp_ls_sleepable *skel; + struct bpf_link *link; + int err, iter_fd; + char buf[16]; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.cgroup_iter, true); + err = cgrp_ls_sleepable__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgroup_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto out; + + /* trigger the program run */ + (void)read(iter_fd, buf, sizeof(buf)); + + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); + + close(iter_fd); +out: + cgrp_ls_sleepable__destroy(skel); +} + +static void test_no_rcu_lock(__u64 cgroup_id) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = cgrp_ls_sleepable__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); +out: + cgrp_ls_sleepable__destroy(skel); +} + +static void test_rcu_lock(void) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + ASSERT_ERR(err, "skel_load"); + + cgrp_ls_sleepable__destroy(skel); +} + +void test_cgrp_local_storage(void) +{ + __u64 cgroup_id; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/cgrp_local_storage"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) + return; + + cgroup_id = get_cgroup_id("/cgrp_local_storage"); + if (test__start_subtest("tp_btf")) + test_tp_btf(cgroup_fd); + if (test__start_subtest("attach_cgroup")) + test_attach_cgroup(cgroup_fd); + if (test__start_subtest("recursion")) + test_recursion(cgroup_fd); + if (test__start_subtest("negative")) + test_negative(); + if (test__start_subtest("cgroup_iter_sleepable")) + test_cgroup_iter_sleepable(cgroup_fd, cgroup_id); + if (test__start_subtest("no_rcu_lock")) + test_no_rcu_lock(cgroup_id); + if (test__start_subtest("rcu_lock")) + test_rcu_lock(); + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 8fc4e6c02bfd..7faaf6d9e0d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -5,86 +5,16 @@ #include "dynptr_fail.skel.h" #include "dynptr_success.skel.h" -static size_t log_buf_sz = 1048576; /* 1 MB */ -static char obj_log_buf[1048576]; - static struct { const char *prog_name; const char *expected_err_msg; } dynptr_tests[] = { - /* failure cases */ - {"ringbuf_missing_release1", "Unreleased reference id=1"}, - {"ringbuf_missing_release2", "Unreleased reference id=2"}, - {"ringbuf_missing_release_callback", "Unreleased reference id"}, - {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, - {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, - {"add_dynptr_to_map1", "invalid indirect read from stack"}, - {"add_dynptr_to_map2", "invalid indirect read from stack"}, - {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, - {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, - {"data_slice_use_after_release1", "invalid mem access 'scalar'"}, - {"data_slice_use_after_release2", "invalid mem access 'scalar'"}, - {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, - {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, - {"invalid_helper1", "invalid indirect read from stack"}, - {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, - {"invalid_write1", "Expected an initialized dynptr as arg #1"}, - {"invalid_write2", "Expected an initialized dynptr as arg #3"}, - {"invalid_write3", "Expected an initialized dynptr as arg #1"}, - {"invalid_write4", "arg 1 is an unacquired reference"}, - {"invalid_read1", "invalid read from stack"}, - {"invalid_read2", "cannot pass in dynptr at an offset"}, - {"invalid_read3", "invalid read from stack"}, - {"invalid_read4", "invalid read from stack"}, - {"invalid_offset", "invalid write to stack"}, - {"global", "type=map_value expected=fp"}, - {"release_twice", "arg 1 is an unacquired reference"}, - {"release_twice_callback", "arg 1 is an unacquired reference"}, - {"dynptr_from_mem_invalid_api", - "Unsupported reg type fp for bpf_dynptr_from_mem data"}, - /* success cases */ {"test_read_write", NULL}, {"test_data_slice", NULL}, {"test_ringbuf", NULL}, }; -static void verify_fail(const char *prog_name, const char *expected_err_msg) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct bpf_program *prog; - struct dynptr_fail *skel; - int err; - - opts.kernel_log_buf = obj_log_buf; - opts.kernel_log_size = log_buf_sz; - opts.kernel_log_level = 1; - - skel = dynptr_fail__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) - goto cleanup; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto cleanup; - - bpf_program__set_autoload(prog, true); - - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); - - err = dynptr_fail__load(skel); - if (!ASSERT_ERR(err, "unexpected load success")) - goto cleanup; - - if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { - fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); - fprintf(stderr, "Verifier output: %s\n", obj_log_buf); - } - -cleanup: - dynptr_fail__destroy(skel); -} - static void verify_success(const char *prog_name) { struct dynptr_success *skel; @@ -97,8 +27,6 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); - dynptr_success__load(skel); if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) goto cleanup; @@ -129,10 +57,8 @@ void test_dynptr(void) if (!test__start_subtest(dynptr_tests[i].prog_name)) continue; - if (dynptr_tests[i].expected_err_msg) - verify_fail(dynptr_tests[i].prog_name, - dynptr_tests[i].expected_err_msg); - else - verify_success(dynptr_tests[i].prog_name); + verify_success(dynptr_tests[i].prog_name); } + + RUN_TESTS(dynptr_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c new file mode 100644 index 000000000000..32dd731e9070 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <net/if.h> +#include "empty_skb.skel.h" + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +void test_empty_skb(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, tattr); + struct empty_skb *bpf_obj = NULL; + struct nstoken *tok = NULL; + struct bpf_program *prog; + char eth_hlen_pp[15]; + char eth_hlen[14]; + int veth_ifindex; + int ipip_ifindex; + int err; + int i; + + struct { + const char *msg; + const void *data_in; + __u32 data_size_in; + int *ifindex; + int err; + int ret; + bool success_on_tc; + } tests[] = { + /* Empty packets are always rejected. */ + + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "veth empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &veth_ifindex, + .err = -EINVAL, + }, + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "ipip empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &ipip_ifindex, + .err = -EINVAL, + }, + + /* ETH_HLEN-sized packets: + * - can not be redirected at LWT_XMIT + * - can be redirected at TC to non-tunneling dest + */ + + { + /* __bpf_redirect_common */ + .msg = "veth ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &veth_ifindex, + .ret = -ERANGE, + .success_on_tc = true, + }, + { + /* __bpf_redirect_no_mac + * + * lwt: skb->len=0 <= skb_network_offset=0 + * tc: skb->len=14 <= skb_network_offset=14 + */ + .msg = "ipip ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &ipip_ifindex, + .ret = -ERANGE, + }, + + /* ETH_HLEN+1-sized packet should be redirected. */ + + { + .msg = "veth ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &veth_ifindex, + }, + { + .msg = "ipip ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &ipip_ifindex, + }, + }; + + SYS("ip netns add empty_skb"); + tok = open_netns("empty_skb"); + SYS("ip link add veth0 type veth peer veth1"); + SYS("ip link set dev veth0 up"); + SYS("ip link set dev veth1 up"); + SYS("ip addr add 10.0.0.1/8 dev veth0"); + SYS("ip addr add 10.0.0.2/8 dev veth1"); + veth_ifindex = if_nametoindex("veth0"); + + SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS("ip link set ipip0 up"); + SYS("ip addr add 192.168.1.1/16 dev ipip0"); + ipip_ifindex = if_nametoindex("ipip0"); + + bpf_obj = empty_skb__open_and_load(); + if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) + goto out; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + bpf_object__for_each_program(prog, bpf_obj->obj) { + char buf[128]; + bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + + tattr.data_in = tests[i].data_in; + tattr.data_size_in = tests[i].data_size_in; + + tattr.data_size_out = 0; + bpf_obj->bss->ifindex = *tests[i].ifindex; + bpf_obj->bss->ret = 0; + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr); + sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog)); + + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(err, 0, buf); + else + ASSERT_EQ(err, tests[i].err, buf); + sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog)); + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(bpf_obj->bss->ret, 0, buf); + else + ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + } + } + +out: + if (bpf_obj) + empty_skb__destroy(bpf_obj); + if (tok) + close_netns(tok); + system("ip netns del empty_skb"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c index 4747ab18f97f..d358a223fd2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/hashmap.c +++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c @@ -7,17 +7,18 @@ */ #include "test_progs.h" #include "bpf/hashmap.h" +#include <stddef.h> static int duration = 0; -static size_t hash_fn(const void *k, void *ctx) +static size_t hash_fn(long k, void *ctx) { - return (long)k; + return k; } -static bool equal_fn(const void *a, const void *b, void *ctx) +static bool equal_fn(long a, long b, void *ctx) { - return (long)a == (long)b; + return a == b; } static inline size_t next_pow_2(size_t n) @@ -52,8 +53,8 @@ static void test_hashmap_generic(void) return; for (i = 0; i < ELEM_CNT; i++) { - const void *oldk, *k = (const void *)(long)i; - void *oldv, *v = (void *)(long)(1024 + i); + long oldk, k = i; + long oldv, v = 1024 + i; err = hashmap__update(map, k, v, &oldk, &oldv); if (CHECK(err != -ENOENT, "hashmap__update", @@ -64,20 +65,18 @@ static void test_hashmap_generic(void) err = hashmap__add(map, k, v); } else { err = hashmap__set(map, k, v, &oldk, &oldv); - if (CHECK(oldk != NULL || oldv != NULL, "check_kv", - "unexpected k/v: %p=%p\n", oldk, oldv)) + if (CHECK(oldk != 0 || oldv != 0, "check_kv", + "unexpected k/v: %ld=%ld\n", oldk, oldv)) goto cleanup; } - if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", - (long)k, (long)v, err)) + if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err)) goto cleanup; if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", - "failed to find key %ld\n", (long)k)) + "failed to find key %ld\n", k)) goto cleanup; - if (CHECK(oldv != v, "elem_val", - "found value is wrong: %ld\n", (long)oldv)) + if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv)) goto cleanup; } @@ -91,8 +90,8 @@ static void test_hashmap_generic(void) found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { - long k = (long)entry->key; - long v = (long)entry->value; + long k = entry->key; + long v = entry->value; found_msk |= 1ULL << k; if (CHECK(v - k != 1024, "check_kv", @@ -104,8 +103,8 @@ static void test_hashmap_generic(void) goto cleanup; for (i = 0; i < ELEM_CNT; i++) { - const void *oldk, *k = (const void *)(long)i; - void *oldv, *v = (void *)(long)(256 + i); + long oldk, k = i; + long oldv, v = 256 + i; err = hashmap__add(map, k, v); if (CHECK(err != -EEXIST, "hashmap__add", @@ -119,13 +118,13 @@ static void test_hashmap_generic(void) if (CHECK(err, "elem_upd", "failed to update k/v %ld = %ld: %d\n", - (long)k, (long)v, err)) + k, v, err)) goto cleanup; if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", - "failed to find key %ld\n", (long)k)) + "failed to find key %ld\n", k)) goto cleanup; if (CHECK(oldv != v, "elem_val", - "found value is wrong: %ld\n", (long)oldv)) + "found value is wrong: %ld\n", oldv)) goto cleanup; } @@ -139,8 +138,8 @@ static void test_hashmap_generic(void) found_msk = 0; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { - long k = (long)entry->key; - long v = (long)entry->value; + long k = entry->key; + long v = entry->value; found_msk |= 1ULL << k; if (CHECK(v - k != 256, "elem_check", @@ -152,7 +151,7 @@ static void test_hashmap_generic(void) goto cleanup; found_cnt = 0; - hashmap__for_each_key_entry(map, entry, (void *)0) { + hashmap__for_each_key_entry(map, entry, 0) { found_cnt++; } if (CHECK(!found_cnt, "found_cnt", @@ -161,27 +160,25 @@ static void test_hashmap_generic(void) found_msk = 0; found_cnt = 0; - hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) { - const void *oldk, *k; - void *oldv, *v; + hashmap__for_each_key_entry_safe(map, entry, tmp, 0) { + long oldk, k; + long oldv, v; k = entry->key; v = entry->value; found_cnt++; - found_msk |= 1ULL << (long)k; + found_msk |= 1ULL << k; if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "failed to delete k/v %ld = %ld\n", - (long)k, (long)v)) + "failed to delete k/v %ld = %ld\n", k, v)) goto cleanup; if (CHECK(oldk != k || oldv != v, "check_old", "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n", - (long)k, (long)v, (long)oldk, (long)oldv)) + k, v, oldk, oldv)) goto cleanup; if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "unexpectedly deleted k/v %ld = %ld\n", - (long)oldk, (long)oldv)) + "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv)) goto cleanup; } @@ -198,26 +195,24 @@ static void test_hashmap_generic(void) goto cleanup; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { - const void *oldk, *k; - void *oldv, *v; + long oldk, k; + long oldv, v; k = entry->key; v = entry->value; found_cnt++; - found_msk |= 1ULL << (long)k; + found_msk |= 1ULL << k; if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "failed to delete k/v %ld = %ld\n", - (long)k, (long)v)) + "failed to delete k/v %ld = %ld\n", k, v)) goto cleanup; if (CHECK(oldk != k || oldv != v, "elem_check", "invalid old k/v: expect %ld = %ld, got %ld = %ld\n", - (long)k, (long)v, (long)oldk, (long)oldv)) + k, v, oldk, oldv)) goto cleanup; if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "unexpectedly deleted k/v %ld = %ld\n", - (long)k, (long)v)) + "unexpectedly deleted k/v %ld = %ld\n", k, v)) goto cleanup; } @@ -235,7 +230,7 @@ static void test_hashmap_generic(void) hashmap__for_each_entry(map, entry, bkt) { CHECK(false, "elem_exists", "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); + entry->key, entry->value); goto cleanup; } @@ -243,22 +238,107 @@ static void test_hashmap_generic(void) hashmap__for_each_entry(map, entry, bkt) { CHECK(false, "elem_exists", "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); + entry->key, entry->value); + goto cleanup; + } + +cleanup: + hashmap__free(map); +} + +static size_t str_hash_fn(long a, void *ctx) +{ + return str_hash((char *)a); +} + +static bool str_equal_fn(long a, long b, void *ctx) +{ + return strcmp((char *)a, (char *)b) == 0; +} + +/* Verify that hashmap interface works with pointer keys and values */ +static void test_hashmap_ptr_iface(void) +{ + const char *key, *value, *old_key, *old_value; + struct hashmap_entry *cur; + struct hashmap *map; + int err, i, bkt; + + map = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n")) goto cleanup; + +#define CHECK_STR(fn, var, expected) \ + CHECK(strcmp(var, (expected)), (fn), \ + "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected)) + + err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL); + if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err)) + goto cleanup; + + err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value); + if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__update", old_key, "a"); + CHECK_STR("hashmap__update", old_value, "apricot"); + + err = hashmap__add(map, "b", "banana"); + if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err)) + goto cleanup; + + err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value); + if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__set", old_key, "b"); + CHECK_STR("hashmap__set", old_value, "banana"); + + err = hashmap__update(map, "b", "blueberry", &old_key, &old_value); + if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__update", old_key, "b"); + CHECK_STR("hashmap__update", old_value, "breadfruit"); + + err = hashmap__append(map, "c", "cherry"); + if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err)) + goto cleanup; + + if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value), + "hashmap__delete", "expected to have entry for 'c'\n")) + goto cleanup; + CHECK_STR("hashmap__delete", old_key, "c"); + CHECK_STR("hashmap__delete", old_value, "cherry"); + + CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n"); + CHECK_STR("hashmap__find", value, "blueberry"); + + if (CHECK(!hashmap__delete(map, "b", NULL, NULL), + "hashmap__delete", "expected to have entry for 'b'\n")) + goto cleanup; + + i = 0; + hashmap__for_each_entry(map, cur, bkt) { + if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries")) + goto cleanup; + key = cur->pkey; + value = cur->pvalue; + CHECK_STR("entry", key, "a"); + CHECK_STR("entry", value, "apple"); + i++; } +#undef CHECK_STR cleanup: hashmap__free(map); } -static size_t collision_hash_fn(const void *k, void *ctx) +static size_t collision_hash_fn(long k, void *ctx) { return 0; } static void test_hashmap_multimap(void) { - void *k1 = (void *)0, *k2 = (void *)1; + long k1 = 0, k2 = 1; struct hashmap_entry *entry; struct hashmap *map; long found_msk; @@ -273,23 +353,23 @@ static void test_hashmap_multimap(void) * [0] -> 1, 2, 4; * [1] -> 8, 16, 32; */ - err = hashmap__append(map, k1, (void *)1); + err = hashmap__append(map, k1, 1); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k1, (void *)2); + err = hashmap__append(map, k1, 2); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k1, (void *)4); + err = hashmap__append(map, k1, 4); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)8); + err = hashmap__append(map, k2, 8); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)16); + err = hashmap__append(map, k2, 16); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)32); + err = hashmap__append(map, k2, 32); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; @@ -300,7 +380,7 @@ static void test_hashmap_multimap(void) /* verify global iteration still works and sees all values */ found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (1 << 6) - 1, "found_msk", "not all keys iterated: %lx\n", found_msk)) @@ -309,7 +389,7 @@ static void test_hashmap_multimap(void) /* iterate values for key 1 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k1) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (1 | 2 | 4), "found_msk", "invalid k1 values: %lx\n", found_msk)) @@ -318,7 +398,7 @@ static void test_hashmap_multimap(void) /* iterate values for key 2 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k2) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (8 | 16 | 32), "found_msk", "invalid k2 values: %lx\n", found_msk)) @@ -333,7 +413,7 @@ static void test_hashmap_empty() struct hashmap_entry *entry; int bkt; struct hashmap *map; - void *k = (void *)0; + long k = 0; /* force collisions */ map = hashmap__new(hash_fn, equal_fn, NULL); @@ -374,4 +454,6 @@ void test_hashmap() test_hashmap_multimap(); if (test__start_subtest("empty")) test_hashmap_empty(); + if (test__start_subtest("ptr_iface")) + test_hashmap_ptr_iface(); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index c210657d4d0a..a9229260a6ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -18,11 +18,8 @@ static struct { const char *expected_verifier_err_msg; int expected_runtime_err; } kfunc_dynptr_tests[] = { - {"dynptr_type_not_supp", - "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0}, - {"not_valid_dynptr", - "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0}, - {"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0}, + {"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0}, + {"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0}, {"dynptr_data_null", NULL, -EBADMSG}, }; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index d457a55ff408..c6f37e825f11 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -312,12 +312,12 @@ static inline __u64 get_time_ns(void) return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; } -static size_t symbol_hash(const void *key, void *ctx __maybe_unused) +static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); } -static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused) +static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) { return strcmp((const char *) key1, (const char *) key2) == 0; } @@ -325,7 +325,7 @@ static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_u static int get_syms(char ***symsp, size_t *cntp) { size_t cap = 0, cnt = 0, i; - char *name, **syms = NULL; + char *name = NULL, **syms = NULL; struct hashmap *map; char buf[256]; FILE *f; @@ -352,16 +352,20 @@ static int get_syms(char ***symsp, size_t *cntp) /* skip modules */ if (strchr(buf, '[')) continue; + + free(name); if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) continue; /* * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) @@ -369,38 +373,38 @@ static int get_syms(char ***symsp, size_t *cntp) if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) continue; - err = hashmap__add(map, name, NULL); - if (err) { - free(name); - if (err == -EEXIST) - continue; + + err = hashmap__add(map, name, 0); + if (err == -EEXIST) + continue; + if (err) goto error; - } + err = libbpf_ensure_mem((void **) &syms, &cap, sizeof(*syms), cnt + 1); - if (err) { - free(name); + if (err) goto error; - } - syms[cnt] = name; - cnt++; + + syms[cnt++] = name; + name = NULL; } *symsp = syms; *cntp = cnt; error: + free(name); fclose(f); hashmap__free(map); if (err) { for (i = 0; i < cnt; i++) - free(syms[cnt]); + free(syms[i]); free(syms); } return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -468,6 +472,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c new file mode 100644 index 000000000000..1fbe7e4ac00a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "kprobe_multi.skel.h" +#include "trace_helpers.h" +#include "bpf/libbpf_internal.h" + +static void kprobe_multi_testmod_check(struct kprobe_multi *skel) +{ + ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result"); + + ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result"); +} + +static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts) +{ + struct kprobe_multi *skel = NULL; + + skel = kprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load")) + return; + + skel->bss->pid = getpid(); + + skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kprobe_testmod, + NULL, opts); + if (!skel->links.test_kprobe_testmod) + goto cleanup; + + opts->retprobe = true; + skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kretprobe_testmod, + NULL, opts); + if (!skel->links.test_kretprobe_testmod) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + kprobe_multi_testmod_check(skel); + +cleanup: + kprobe_multi__destroy(skel); +} + +static void test_testmod_attach_api_addrs(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + unsigned long long addrs[3]; + + addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1"); + ASSERT_NEQ(addrs[0], 0, "ksym_get_addr"); + addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2"); + ASSERT_NEQ(addrs[1], 0, "ksym_get_addr"); + addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3"); + ASSERT_NEQ(addrs[2], 0, "ksym_get_addr"); + + opts.addrs = (const unsigned long *) addrs; + opts.cnt = ARRAY_SIZE(addrs); + + test_testmod_attach_api(&opts); +} + +static void test_testmod_attach_api_syms(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *syms[3] = { + "bpf_testmod_fentry_test1", + "bpf_testmod_fentry_test2", + "bpf_testmod_fentry_test3", + }; + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + test_testmod_attach_api(&opts); +} + +void serial_test_kprobe_multi_testmod_test(void) +{ + if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + return; + + if (test__start_subtest("testmod_attach_api_syms")) + test_testmod_attach_api_syms(); + if (test__start_subtest("testmod_attach_api_addrs")) + test_testmod_attach_api_addrs(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c new file mode 100644 index 000000000000..25e5dfa9c315 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include <test_progs.h> + +#include "test_libbpf_get_fd_by_id_opts.skel.h" + +void test_libbpf_get_fd_by_id_opts(void) +{ + struct test_libbpf_get_fd_by_id_opts *skel; + struct bpf_map_info info_m = {}; + __u32 len = sizeof(info_m), value; + int ret, zero = 0, fd = -1; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, fd_opts_rdonly, + .open_flags = BPF_F_RDONLY, + ); + + skel = test_libbpf_get_fd_by_id_opts__open_and_load(); + if (!ASSERT_OK_PTR(skel, + "test_libbpf_get_fd_by_id_opts__open_and_load")) + return; + + ret = test_libbpf_get_fd_by_id_opts__attach(skel); + if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach")) + goto close_prog; + + ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.data_input), + &info_m, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd")) + goto close_prog; + + fd = bpf_map_get_fd_by_id(info_m.id); + if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id")) + goto close_prog; + + fd = bpf_map_get_fd_by_id_opts(info_m.id, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id_opts")) + goto close_prog; + + fd = bpf_map_get_fd_by_id_opts(info_m.id, &fd_opts_rdonly); + if (!ASSERT_GE(fd, 0, "bpf_map_get_fd_by_id_opts")) + goto close_prog; + + /* Map lookup should work with read-only fd. */ + ret = bpf_map_lookup_elem(fd, &zero, &value); + if (!ASSERT_OK(ret, "bpf_map_lookup_elem")) + goto close_prog; + + if (!ASSERT_EQ(value, 0, "map value mismatch")) + goto close_prog; + + /* Map update should not work with read-only fd. */ + ret = bpf_map_update_elem(fd, &zero, &len, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem")) + goto close_prog; + + /* Map update should work with read-write fd. */ + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.data_input), &zero, + &len, BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem")) + goto close_prog; + + /* Prog get fd with opts set should not work (no kernel support). */ + ret = bpf_prog_get_fd_by_id_opts(0, &fd_opts_rdonly); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_prog_get_fd_by_id_opts")) + goto close_prog; + + /* Link get fd with opts set should not work (no kernel support). */ + ret = bpf_link_get_fd_by_id_opts(0, &fd_opts_rdonly); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_link_get_fd_by_id_opts")) + goto close_prog; + + /* BTF get fd with opts set should not work (no kernel support). */ + ret = bpf_btf_get_fd_by_id_opts(0, &fd_opts_rdonly); + ASSERT_EQ(ret, -EINVAL, "bpf_btf_get_fd_by_id_opts"); + +close_prog: + if (fd >= 0) + close(fd); + + test_libbpf_get_fd_by_id_opts__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 93e9cddaadcf..efb8bd43653c 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -139,6 +139,14 @@ static void test_libbpf_bpf_map_type_str(void) snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); uppercase(buf); + /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED + * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value + * (map_type). For this enum value, libbpf_bpf_map_type_str() picks + * BPF_MAP_TYPE_CGROUP_STORAGE. + */ + if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0) + continue; + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c new file mode 100644 index 000000000000..9a7d4c47af63 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -0,0 +1,740 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/btf.h> +#include <test_btf.h> +#include <linux/btf.h> +#include <test_progs.h> +#include <network_helpers.h> + +#include "linked_list.skel.h" +#include "linked_list_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} linked_list_fail_tests[] = { +#define TEST(test, off) \ + { #test "_missing_lock_push_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_push_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, + TEST(kptr, 32) + TEST(global, 16) + TEST(map, 0) + TEST(inner_map, 0) +#undef TEST +#define TEST(test, op) \ + { #test "_kptr_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \ + { #test "_global_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \ + { #test "_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \ + { #test "_inner_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, + TEST(kptr, push_front) + TEST(kptr, push_back) + TEST(kptr, pop_front) + TEST(kptr, pop_back) + TEST(global, push_front) + TEST(global, push_back) + TEST(global, pop_front) + TEST(global, pop_back) + TEST(map, push_front) + TEST(map, push_back) + TEST(map, pop_front) + TEST(map, pop_back) + TEST(inner_map, push_front) + TEST(inner_map, push_back) + TEST(inner_map, pop_front) + TEST(inner_map, pop_back) +#undef TEST + { "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_tp", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_perf", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" }, + { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" }, + { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" }, + { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, + { "obj_new_acq", "Unreleased reference id=" }, + { "use_after_drop", "invalid mem access 'scalar'" }, + { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_read_head", "direct access to bpf_list_head is disallowed" }, + { "direct_write_head", "direct access to bpf_list_head is disallowed" }, + { "direct_read_node", "direct access to bpf_list_node is disallowed" }, + { "direct_write_node", "direct access to bpf_list_node is disallowed" }, + { "write_after_push_front", "only read is supported" }, + { "write_after_push_back", "only read is supported" }, + { "use_after_unlock_push_front", "invalid mem access 'scalar'" }, + { "use_after_unlock_push_back", "invalid mem access 'scalar'" }, + { "double_push_front", "arg#1 expected pointer to allocated object" }, + { "double_push_back", "arg#1 expected pointer to allocated object" }, + { "no_node_value_type", "bpf_list_node not found at offset=0" }, + { "incorrect_value_type", + "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, " + "but arg is at offset=0 in struct bar" }, + { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_node_off1", "bpf_list_node not found at offset=1" }, + { "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" }, + { "no_head_type", "bpf_list_head not found at offset=0" }, + { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, + { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_head_off1", "bpf_list_head not found at offset=17" }, + { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, + { "pop_front_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, + { "pop_back_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, +}; + +static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct linked_list_fail *skel; + struct bpf_program *prog; + int ret; + + skel = linked_list_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = linked_list_fail__load(skel); + if (!ASSERT_ERR(ret, "linked_list_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + linked_list_fail__destroy(skel); +} + +static void clear_fields(struct bpf_map *map) +{ + char buf[24]; + int key = 0; + + memset(buf, 0xff, sizeof(buf)); + ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields"); +} + +enum { + TEST_ALL, + PUSH_POP, + PUSH_POP_MULT, + LIST_IN_LIST, +}; + +static void test_linked_list_success(int mode, bool leave_in_map) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct linked_list *skel; + int ret; + + skel = linked_list__open_and_load(); + if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load")) + return; + + if (mode == LIST_IN_LIST) + goto lil; + if (mode == PUSH_POP_MULT) + goto ppm; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts); + ASSERT_OK(ret, "map_list_push_pop"); + ASSERT_OK(opts.retval, "map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts); + ASSERT_OK(ret, "global_list_push_pop"); + ASSERT_OK(opts.retval, "global_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP) + goto end; + +ppm: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "global_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP_MULT) + goto end; + +lil: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts); + ASSERT_OK(ret, "map_list_in_list"); + ASSERT_OK(opts.retval, "map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts); + ASSERT_OK(ret, "inner_map_list_in_list"); + ASSERT_OK(opts.retval, "inner_map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts); + ASSERT_OK(ret, "global_list_in_list"); + ASSERT_OK(opts.retval, "global_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); +end: + linked_list__destroy(skel); +} + +#define SPIN_LOCK 2 +#define LIST_HEAD 3 +#define LIST_NODE 4 + +static struct btf *init_btf(void) +{ + int id, lid, hid, nid; + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "btf__new_empty")) + return NULL; + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + if (!ASSERT_EQ(id, 1, "btf__add_int")) + goto end; + lid = btf__add_struct(btf, "bpf_spin_lock", 4); + if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock")) + goto end; + hid = btf__add_struct(btf, "bpf_list_head", 16); + if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head")) + goto end; + nid = btf__add_struct(btf, "bpf_list_node", 16); + if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node")) + goto end; + return btf; +end: + btf__free(btf); + return NULL; +} + +static void test_btf(void) +{ + struct btf *btf = NULL; + int id, err; + + while (test__start_subtest("btf: too many locks")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "c", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -E2BIG, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing lock")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 16); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: bad offset")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EEXIST, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing contains:")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing struct")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing node")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c")) + break; + + err = btf__load_into_kernel(btf); + btf__free(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + break; + } + + while (test__start_subtest("btf: node incorrect type")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 4); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: multiple bpf_list_node with name b")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 52); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0); + if (!ASSERT_OK(err, "btf__add_field foo::d")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned AA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned ABA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar:a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar:b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar:c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag")) + break; + id = btf__add_struct(btf, "baz", 36); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0); + if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a")) + break; + id = btf__add_struct(btf, "bam", 16); + if (!ASSERT_EQ(id, 11, "btf__add_struct bam")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bam::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } +} + +void test_linked_list(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) { + if (!test__start_subtest(linked_list_fail_tests[i].prog_name)) + continue; + test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name, + linked_list_fail_tests[i].err_msg); + } + test_btf(); + test_linked_list_success(PUSH_POP, false); + test_linked_list_success(PUSH_POP, true); + test_linked_list_success(PUSH_POP_MULT, false); + test_linked_list_success(PUSH_POP_MULT, true); + test_linked_list_success(LIST_IN_LIST, false); + test_linked_list_success(LIST_IN_LIST, true); + test_linked_list_success(TEST_ALL, false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 1102e4f42d2d..f117bfef68a1 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -173,10 +173,12 @@ static void test_lsm_cgroup_functional(void) ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); - /* AF_UNIX is prohibited. */ - fd = socket(AF_UNIX, SOCK_STREAM, 0); - ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK)) + /* AF_UNIX is prohibited. */ + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); close(fd); /* AF_INET6 gets default policy (sk_priority). */ @@ -233,11 +235,18 @@ static void test_lsm_cgroup_functional(void) /* AF_INET6+SOCK_STREAM * AF_PACKET+SOCK_RAW + * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed * listen_fd * client_fd * accepted_fd */ - ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + if (skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK) + /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2"); + else + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); /* start_server * bind(ETH_P_ALL) diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index fdcea7a61491..3533a4ecad01 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -5,83 +5,6 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" -static char log_buf[1024 * 1024]; - -struct { - const char *prog_name; - const char *err_msg; -} map_kptr_fail_tests[] = { - { "size_not_bpf_dw", "kptr access size must be BPF_DW" }, - { "non_const_var_off", "kptr access cannot have variable offset" }, - { "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" }, - { "misaligned_access_write", "kptr access misaligned expected=8 off=7" }, - { "misaligned_access_read", "kptr access misaligned expected=8 off=1" }, - { "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" }, - { "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" }, - { "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, - { "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" }, - { "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" }, - { "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" }, - { "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" }, - { "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" }, - { "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" }, - { "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" }, - { "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" }, - { "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, - { "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" }, - { "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" }, - { "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" }, - { "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" }, - { "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" }, - { "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" }, - { "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" }, - { "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" }, -}; - -static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, - .kernel_log_size = sizeof(log_buf), - .kernel_log_level = 1); - struct map_kptr_fail *skel; - struct bpf_program *prog; - int ret; - - skel = map_kptr_fail__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts")) - return; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto end; - - bpf_program__set_autoload(prog, true); - - ret = map_kptr_fail__load(skel); - if (!ASSERT_ERR(ret, "map_kptr__load must fail")) - goto end; - - if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { - fprintf(stderr, "Expected: %s\n", err_msg); - fprintf(stderr, "Verifier: %s\n", log_buf); - } - -end: - map_kptr_fail__destroy(skel); -} - -static void test_map_kptr_fail(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) { - if (!test__start_subtest(map_kptr_fail_tests[i].prog_name)) - continue; - test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name, - map_kptr_fail_tests[i].err_msg); - } -} - static void test_map_kptr_success(bool test_run) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -105,7 +28,7 @@ static void test_map_kptr_success(bool test_run) ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); if (test_run) - return; + goto exit; ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); @@ -132,6 +55,7 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); +exit: map_kptr__destroy(skel); } @@ -144,5 +68,6 @@ void test_map_kptr(void) */ test_map_kptr_success(true); } - test_map_kptr_fail(); + + RUN_TESTS(map_kptr_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d0e50dcf47c..7fc01ff490db 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -103,6 +103,13 @@ void test_module_attach(void) ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); bpf_link__destroy(link); + link = bpf_program__attach(skel->progs.kprobe_multi); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) + goto cleanup; + + ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + bpf_link__destroy(link); + cleanup: test_module_attach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c new file mode 100644 index 000000000000..447d8560ecb6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "rcu_read_lock.skel.h" +#include "cgroup_helpers.h" + +static unsigned long long cgroup_id; + +static void test_success(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.get_cgroup_id, true); + bpf_program__set_autoload(skel->progs.task_succ, true); + bpf_program__set_autoload(skel->progs.no_lock, true); + bpf_program__set_autoload(skel->progs.two_regions, true); + bpf_program__set_autoload(skel->progs.non_sleepable_1, true); + bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val"); + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); +out: + rcu_read_lock__destroy(skel); +} + +static void test_rcuptr_acquire(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.task_acquire, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + ASSERT_OK(err, "skel_attach"); +out: + rcu_read_lock__destroy(skel); +} + +static const char * const inproper_region_tests[] = { + "miss_lock", + "miss_unlock", + "non_sleepable_rcu_mismatch", + "inproper_sleepable_helper", + "inproper_sleepable_kfunc", + "nested_rcu_region", +}; + +static void test_inproper_region(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +static const char * const rcuptr_misuse_tests[] = { + "task_untrusted_non_rcuptr", + "task_untrusted_rcuptr", + "cross_rcu_region", +}; + +static void test_rcuptr_misuse(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +void test_rcu_read_lock(void) +{ + struct btf *vmlinux_btf; + int cgroup_fd; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { + test__skip(); + goto out; + } + + cgroup_fd = test__join_cgroup("/rcu_read_lock"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) + goto out; + + cgroup_id = get_cgroup_id("/rcu_read_lock"); + if (test__start_subtest("success")) + test_success(); + if (test__start_subtest("rcuptr_acquire")) + test_rcuptr_acquire(); + if (test__start_subtest("negative_tests_inproper_region")) + test_inproper_region(); + if (test__start_subtest("negative_tests_rcuptr_misuse")) + test_rcuptr_misuse(); + close(cgroup_fd); +out: + btf__free(vmlinux_btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 9a80fe8a6427..ac104dc652e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -13,6 +13,7 @@ #include <linux/perf_event.h> #include <linux/ring_buffer.h> #include "test_ringbuf.lskel.h" +#include "test_ringbuf_map_key.lskel.h" #define EDONE 7777 @@ -58,6 +59,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } +static struct test_ringbuf_map_key_lskel *skel_map_key; static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; @@ -81,7 +83,7 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } -void test_ringbuf(void) +static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; @@ -297,3 +299,65 @@ cleanup: ring_buffer__free(ringbuf); test_ringbuf_lskel__destroy(skel); } + +static int process_map_key_sample(void *ctx, void *data, size_t len) +{ + struct sample *s; + int err, val; + + s = data; + switch (s->seq) { + case 1: + ASSERT_EQ(s->value, 42, "sample_value"); + err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd, + s, &val); + ASSERT_OK(err, "hash_map bpf_map_lookup_elem"); + ASSERT_EQ(val, 1, "hash_map val"); + return -EDONE; + default: + return 0; + } +} + +static void ringbuf_map_key_subtest(void) +{ + int err; + + skel_map_key = test_ringbuf_map_key_lskel__open(); + if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open")) + return; + + skel_map_key->maps.ringbuf.max_entries = getpagesize(); + skel_map_key->bss->pid = getpid(); + + err = test_ringbuf_map_key_lskel__load(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load")) + goto cleanup; + + ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd, + process_map_key_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + err = test_ringbuf_map_key_lskel__attach(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach")) + goto cleanup_ringbuf; + + syscall(__NR_getpgid); + ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq"); + err = ring_buffer__poll(ringbuf, -1); + ASSERT_EQ(err, -EDONE, "ring_buffer__poll"); + +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_map_key_lskel__destroy(skel_map_key); +} + +void test_ringbuf(void) +{ + if (test__start_subtest("ringbuf")) + ringbuf_subtest(); + if (test__start_subtest("ringbuf_map_key")) + ringbuf_map_key_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 99dac5292b41..bc6817aee9aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include <sys/mman.h> struct s { int a; @@ -22,7 +23,8 @@ void test_skeleton(void) struct test_skeleton__kconfig *kcfg; const void *elf_bytes; size_t elf_bytes_sz = 0; - int i; + void *m; + int i, fd; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -124,6 +126,13 @@ void test_skeleton(void) ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + fd = bpf_map__fd(skel->maps.data_non_mmapable); + m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success")) + munmap(m, getpagesize()); + + ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c new file mode 100644 index 000000000000..d9270bd3d920 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_spin_lock.skel.h" +#include "test_spin_lock_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} spin_lock_fail_tests[] = { + { "lock_id_kptr_preserve", + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " + "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=ptr_ expected=percpu_ptr_" }, + { "lock_id_global_zero", + "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mapval_preserve", + "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_innermapval_preserve", + "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, +}; + +static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_spin_lock_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_spin_lock_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_spin_lock_fail__load(skel); + if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail")) + goto end; + + /* Skip check if JIT does not support kfuncs */ + if (strstr(log_buf, "JIT does not support calling kernel function")) { + test__skip(); + goto end; + } + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_spin_lock_fail__destroy(skel); +} + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + pthread_exit(arg); +} + +void test_spin_lock_success(void) +{ + struct test_spin_lock *skel; + pthread_t thread_id[4]; + int prog_fd, i; + void *ret; + + skel = test_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load")) + return; + prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test); + for (i = 0; i < 4; i++) { + int err; + + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end; + } + + for (i = 0; i < 4; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end; + } +end: + test_spin_lock__destroy(skel); +} + +void test_spin_lock(void) +{ + int i; + + test_spin_lock_success(); + + for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) { + if (!test__start_subtest(spin_lock_fail_tests[i].prog_name)) + continue; + test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name, + spin_lock_fail_tests[i].err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c deleted file mode 100644 index 15eb1372d771..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include <network_helpers.h> - -static void *spin_lock_thread(void *arg) -{ - int err, prog_fd = *(u32 *) arg; - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), - .repeat = 10000, - ); - - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); - pthread_exit(arg); -} - -void test_spinlock(void) -{ - const char *file = "./test_spin_lock.bpf.o"; - pthread_t thread_id[4]; - struct bpf_object *obj = NULL; - int prog_fd; - int err = 0, i; - void *ret; - - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (CHECK_FAIL(err)) { - printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); - goto close_prog; - } - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd))) - goto close_prog; - - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || - ret != (void *)&prog_fd)) - goto close_prog; -close_prog: - bpf_object__close(obj); -} diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c new file mode 100644 index 000000000000..18848c31e36f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <sys/wait.h> +#include <test_progs.h> +#include <unistd.h> + +#include "task_kfunc_failure.skel.h" +#include "task_kfunc_success.skel.h" + +static size_t log_buf_sz = 1 << 20; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct task_kfunc_success *open_load_task_kfunc_skel(void) +{ + struct task_kfunc_success *skel; + int err; + + skel = task_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = task_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + task_kfunc_success__destroy(skel); + return NULL; +} + +static void run_success_test(const char *prog_name) +{ + struct task_kfunc_success *skel; + int status; + pid_t child_pid; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_task_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_spawn_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + if (child_pid == 0) + _exit(0); + waitpid(child_pid, &status, 0); + + ASSERT_OK(skel->bss->err, "post_wait_err"); + +cleanup: + bpf_link__destroy(link); + task_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_task_acquire_release_argument", + "test_task_acquire_release_current", + "test_task_acquire_leave_in_map", + "test_task_xchg_release", + "test_task_get_release", + "test_task_current_acquire_release", + "test_task_from_pid_arg", + "test_task_from_pid_current", + "test_task_from_pid_invalid", +}; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} failure_tests[] = { + {"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"}, + {"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"}, + {"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"}, + {"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_acquire_unreleased", "Unreleased reference"}, + {"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"}, + {"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"}, + {"task_kfunc_get_null", "arg#0 expected pointer to map value"}, + {"task_kfunc_xchg_unreleased", "Unreleased reference"}, + {"task_kfunc_get_unreleased", "Unreleased reference"}, + {"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"}, + {"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, + {"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"}, + {"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, + {"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct task_kfunc_failure *skel; + int err, i; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = task_kfunc_failure__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + struct bpf_program *prog; + const char *curr_name = failure_tests[i].prog_name; + + prog = bpf_object__find_program_by_name(skel->obj, curr_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name)); + } + + err = task_kfunc_failure__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + task_kfunc_failure__destroy(skel); +} + +void test_task_kfunc(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + if (!test__start_subtest(failure_tests[i].prog_name)) + continue; + + verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 035c263aab1b..a176bd75a748 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -3,12 +3,16 @@ #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> +#include <sched.h> +#include <pthread.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <sys/types.h> #include <test_progs.h> +#include "task_local_storage_helpers.h" #include "task_local_storage.skel.h" #include "task_local_storage_exit_creds.skel.h" #include "task_ls_recursion.skel.h" +#include "task_storage_nodeadlock.skel.h" static void test_sys_enter_exit(void) { @@ -39,7 +43,8 @@ out: static void test_exit_creds(void) { struct task_local_storage_exit_creds *skel; - int err; + int err, run_count, sync_rcu_calls = 0; + const int MAX_SYNC_RCU_CALLS = 1000; skel = task_local_storage_exit_creds__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) @@ -53,8 +58,19 @@ static void test_exit_creds(void) if (CHECK_FAIL(system("ls > /dev/null"))) goto out; - /* sync rcu to make sure exit_creds() is called for "ls" */ - kern_sync_rcu(); + /* kern_sync_rcu is not enough on its own as the read section we want + * to wait for may start after we enter synchronize_rcu, so our call + * won't wait for the section to finish. Loop on the run counter + * as well to ensure the program has run. + */ + do { + kern_sync_rcu(); + run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST); + } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS); + + ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS, + "sync_rcu count too high"); + ASSERT_NEQ(run_count, 0, "run_count"); ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); out: @@ -63,24 +79,160 @@ out: static void test_recursion(void) { + int err, map_fd, prog_fd, task_fd; struct task_ls_recursion *skel; - int err; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + long value; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open")) + return; skel = task_ls_recursion__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) - return; + goto out; err = task_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ + skel->bss->test_pid = getpid(); syscall(SYS_gettid); + skel->bss->test_pid = 0; + task_ls_recursion__detach(skel); + + /* Refer to the comment in BPF_PROG(on_update) for + * the explanation on the value 201 and 100. + */ + map_fd = bpf_map__fd(skel->maps.map_a); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_a"); + ASSERT_EQ(value, 201, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + + map_fd = bpf_map__fd(skel->maps.map_b); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_b"); + ASSERT_EQ(value, 100, "map_b value"); + + prog_fd = bpf_program__fd(skel->progs.on_lookup); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_update); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_enter); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion"); out: + close(task_fd); task_ls_recursion__destroy(skel); } +static bool stop; + +static void waitall(const pthread_t *tids, int nr) +{ + int i; + + stop = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +static void *sock_create_loop(void *arg) +{ + struct task_storage_nodeadlock *skel = arg; + int fd; + + while (!stop) { + fd = socket(AF_INET, SOCK_STREAM, 0); + close(fd); + if (skel->bss->nr_get_errs || skel->bss->nr_del_errs) + stop = true; + } + + return NULL; +} + +static void test_nodeadlock(void) +{ + struct task_storage_nodeadlock *skel; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const int nr_threads = 32; + pthread_t tids[nr_threads]; + int i, prog_fd, err; + cpu_set_t old, new; + + /* Pin all threads to one cpu to increase the chance of preemption + * in a sleepable bpf prog. + */ + CPU_ZERO(&new); + CPU_SET(0, &new); + err = sched_getaffinity(getpid(), sizeof(old), &old); + if (!ASSERT_OK(err, "getaffinity")) + return; + err = sched_setaffinity(getpid(), sizeof(new), &new); + if (!ASSERT_OK(err, "setaffinity")) + return; + + skel = task_storage_nodeadlock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto done; + + /* Unnecessary recursion and deadlock detection are reproducible + * in the preemptible kernel. + */ + if (!skel->kconfig->CONFIG_PREEMPT) { + test__skip(); + goto done; + } + + err = task_storage_nodeadlock__attach(skel); + ASSERT_OK(err, "attach prog"); + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&tids[i], NULL, sock_create_loop, skel); + if (err) { + /* Only assert once here to avoid excessive + * PASS printing during test failure. + */ + ASSERT_OK(err, "pthread_create"); + waitall(tids, i); + goto done; + } + } + + /* With 32 threads, 1s is enough to reproduce the issue */ + sleep(1); + waitall(tids, nr_threads); + + info_len = sizeof(info); + prog_fd = bpf_program__fd(skel->progs.socket_post_create); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "prog recursion"); + + ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); + +done: + task_storage_nodeadlock__destroy(skel); + sched_setaffinity(getpid(), sizeof(old), &old); +} + void test_task_local_storage(void) { if (test__start_subtest("sys_enter_exit")) @@ -89,4 +241,6 @@ void test_task_local_storage(void) test_exit_creds(); if (test__start_subtest("recursion")) test_recursion(); + if (test__start_subtest("nodeadlock")) + test_nodeadlock(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index cb6a53b3e023..bca5e6839ac4 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -11,12 +11,12 @@ */ #include <arpa/inet.h> -#include <linux/if.h> #include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> #include <linux/time_types.h> #include <linux/net_tstamp.h> +#include <net/if.h> #include <stdbool.h> #include <stdio.h> #include <sys/stat.h> @@ -59,10 +59,6 @@ #define IFADDR_STR_LEN 18 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" -#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" -#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" -#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" - #define TIMEOUT_MILLIS 10000 #define NSEC_PER_SEC 1000000000ULL @@ -115,7 +111,9 @@ static void netns_setup_namespaces_nofail(const char *verb) } struct netns_setup_result { + int ifindex_veth_src; int ifindex_veth_src_fwd; + int ifindex_veth_dst; int ifindex_veth_dst_fwd; }; @@ -139,27 +137,6 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } -static int get_ifindex(const char *name) -{ - char path[PATH_MAX]; - char buf[32]; - FILE *f; - int ret; - - snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); - f = fopen(path, "r"); - if (!ASSERT_OK_PTR(f, path)) - return -1; - - ret = fread(buf, 1, sizeof(buf), f); - if (!ASSERT_GT(ret, 0, "fread ifindex")) { - fclose(f); - return -1; - } - fclose(f); - return atoi(buf); -} - #define SYS(fmt, ...) \ ({ \ char cmd[1024]; \ @@ -182,11 +159,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; - result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); - if (result->ifindex_veth_src_fwd < 0) + result->ifindex_veth_src = if_nametoindex("veth_src"); + if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + goto fail; + + result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); + if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) goto fail; - result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); - if (result->ifindex_veth_dst_fwd < 0) + + result->ifindex_veth_dst = if_nametoindex("veth_dst"); + if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + goto fail; + + result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); + if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) goto fail; SYS("ip link set veth_src netns " NS_SRC); @@ -260,19 +246,78 @@ fail: return -1; } -static int netns_load_bpf(void) +static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex) +{ + char err_str[128], ifname[16]; + int err; + + qdisc_hook->ifindex = ifindex; + qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + err = bpf_tc_hook_create(qdisc_hook); + snprintf(err_str, sizeof(err_str), + "qdisc add dev %s clsact", + if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>"); + err_str[sizeof(err_str) - 1] = 0; + ASSERT_OK(err, err_str); + + return err; +} + +static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook, + enum bpf_tc_attach_point xgress, + const struct bpf_program *prog, int priority) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_attach); + char err_str[128], ifname[16]; + int err; + + qdisc_hook->attach_point = xgress; + tc_attach.prog_fd = bpf_program__fd(prog); + tc_attach.priority = priority; + err = bpf_tc_attach(qdisc_hook, &tc_attach); + snprintf(err_str, sizeof(err_str), + "filter add dev %s %s prio %d bpf da %s", + if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>", + xgress == BPF_TC_INGRESS ? "ingress" : "egress", + priority, bpf_program__name(prog)); + err_str[sizeof(err_str) - 1] = 0; + ASSERT_OK(err, err_str); + + return err; +} + +#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \ + if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \ + goto fail; \ +}) + +#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \ + if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \ + goto fail; \ +}) + +static int netns_load_bpf(const struct bpf_program *src_prog, + const struct bpf_program *dst_prog, + const struct bpf_program *chk_prog, + const struct netns_setup_result *setup_result) { - SYS("tc qdisc add dev veth_src_fwd clsact"); - SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " - SRC_PROG_PIN_FILE); - SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); - - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " - DST_PROG_PIN_FILE); - SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + int err; + + /* tc qdisc add dev veth_src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); + /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -499,78 +544,79 @@ done: close(client_fd); } -static int netns_load_dtime_bpf(struct test_tc_dtime *skel) +static int netns_load_dtime_bpf(struct test_tc_dtime *skel, + const struct netns_setup_result *setup_result) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); struct nstoken *nstoken; - -#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file -#define PIN(__prog) ({ \ - int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \ - if (!ASSERT_OK(err, "pin " #__prog)) \ - goto fail; \ - }) + int err; /* setup ns_src tc progs */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - PIN(egress_host); - PIN(ingress_host); - SYS("tc qdisc add dev veth_src clsact"); - SYS("tc filter add dev veth_src ingress bpf da object-pinned " - PIN_FNAME(ingress_host)); - SYS("tc filter add dev veth_src egress bpf da object-pinned " - PIN_FNAME(egress_host)); + /* tc qdisc add dev veth_src clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); + /* tc filter add dev veth_src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev veth_src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - PIN(egress_host); - PIN(ingress_host); - SYS("tc qdisc add dev veth_dst clsact"); - SYS("tc filter add dev veth_dst ingress bpf da object-pinned " - PIN_FNAME(ingress_host)); - SYS("tc filter add dev veth_dst egress bpf da object-pinned " - PIN_FNAME(egress_host)); + /* tc qdisc add dev veth_dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); + /* tc filter add dev veth_dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev veth_dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - PIN(ingress_fwdns_prio100); - PIN(egress_fwdns_prio100); - PIN(ingress_fwdns_prio101); - PIN(egress_fwdns_prio101); - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio100)); - SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio101)); - SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio100)); - SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio101)); - SYS("tc qdisc add dev veth_src_fwd clsact"); - SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio100)); - SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio101)); - SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio100)); - SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio101)); + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio100, 100); + /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio101, 101); + /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio100, 100); + /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio101, 101); + + /* tc qdisc add dev veth_src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); + /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio100, 100); + /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio101, 101); + /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio100, 100); + /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); - -#undef PIN - return 0; fail: close_netns(nstoken); - return -1; + return err; } enum { @@ -746,7 +792,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_dtime__load")) goto done; - if (netns_load_dtime_bpf(skel)) + if (netns_load_dtime_bpf(skel, setup_result)) goto done; nstoken = open_netns(NS_FWD); @@ -788,7 +834,6 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) { struct nstoken *nstoken = NULL; struct test_tc_neigh_fib *skel = NULL; - int err; nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns fwd")) @@ -801,19 +846,8 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; /* bpf_fib_lookup() checks if forwarding is enabled */ @@ -849,19 +883,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_neigh__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) @@ -896,19 +919,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_peer__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) @@ -991,6 +1003,8 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1034,8 +1048,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto fail; - ifindex = get_ifindex("tun_fwd"); - if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + ifindex = if_nametoindex("tun_fwd"); + if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd")) goto fail; skel->rodata->IFINDEX_SRC = ifindex; @@ -1045,31 +1059,21 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_peer__load")) goto fail; - err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto fail; - - err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto fail; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto fail; - /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. */ - SYS("tc qdisc add dev tun_fwd clsact"); - SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " - SRC_PROG_PIN_FILE); - - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " - DST_PROG_PIN_FILE); - SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); + /* tc qdisc add dev tun_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); + /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ + XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); + + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1134,7 +1138,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void serial_test_tc_redirect(void) +void test_tc_redirect(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 617bbce6ef8f..5cf85d0f9827 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -485,7 +485,7 @@ static void misc(void) goto check_linum; ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); - if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)")) + if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)")) goto check_linum; } @@ -505,6 +505,8 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin"); + ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); @@ -539,7 +541,7 @@ void test_tcp_hdr_options(void) goto skel_destroy; cg_fd = test__join_cgroup(CG_NAME); - if (ASSERT_GE(cg_fd, 0, "join_cgroup")) + if (!ASSERT_GE(cg_fd, 0, "join_cgroup")) goto skel_destroy; for (i = 0; i < ARRAY_SIZE(tests); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index eea274110267..07ad457f3370 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -421,7 +421,7 @@ static void *test_tunnel_run_tests(void *arg) return NULL; } -void serial_test_tunnel(void) +void test_tunnel(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index d5022b91d1e4..48dc9472e160 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -15,7 +15,7 @@ static void test_fentry(void) err = tracing_struct__attach(skel); if (!ASSERT_OK(err, "tracing_struct__attach")) - return; + goto destroy_skel; ASSERT_OK(trigger_module_test_read(256), "trigger_read"); @@ -54,6 +54,7 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret"); tracing_struct__detach(skel); +destroy_skel: tracing_struct__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c new file mode 100644 index 000000000000..9317d5fa2635 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include "type_cast.skel.h" + +static void test_xdp(void) +{ + struct type_cast *skel; + int err, prog_fd; + char buf[128]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_xdp, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_xdp); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval"); + + ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex"); + ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex"); + ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name"); + ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum"); + +out: + type_cast__destroy(skel); +} + +static void test_tc(void) +{ + struct type_cast *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_skb, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_skb); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "tc test_run retval"); + + ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len"); + ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len"); + ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len"); + ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len"); + ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare"); + +out: + type_cast__destroy(skel); +} + +static const char * const negative_tests[] = { + "untrusted_ptr", + "kctx_u64", +}; + +static void test_negative(void) +{ + struct bpf_program *prog; + struct type_cast *skel; + int i, err; + + for (i = 0; i < ARRAY_SIZE(negative_tests); i++) { + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = type_cast__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + type_cast__destroy(skel); + } +} + +void test_type_cast(void) +{ + if (test__start_subtest("xdp")) + test_xdp(); + if (test__start_subtest("tc")) + test_tc(); + if (test__start_subtest("negative")) + test_negative(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index 02b18d018b36..dae68de285b9 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -673,9 +673,11 @@ static struct { {"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"}, {"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"}, {"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"}, - {"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"}, - {"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"}, + {"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"}, + {"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"}, {"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"}, + {"user_ringbuf_callback_reinit_dynptr_mem", "Dynptr has to be an uninitialized dynptr"}, + {"user_ringbuf_callback_reinit_dynptr_ringbuf", "Dynptr has to be an uninitialized dynptr"}, }; #define SUCCESS_TEST(_func) { _func, #_func } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 9b9cf8458adf..39973ea1ce43 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -18,7 +18,7 @@ static void test_xdp_adjust_tail_shrink(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) return; err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -53,7 +53,7 @@ static void test_xdp_adjust_tail_grow(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -63,6 +63,7 @@ static void test_xdp_adjust_tail_grow(void) expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ topts.data_in = &pkt_v6; topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = sizeof(buf); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv6"); ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval"); @@ -89,7 +90,7 @@ static void test_xdp_adjust_tail_grow2(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; /* Test case-64 */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index 75550a40e029..c72083885b6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -94,12 +94,12 @@ static void test_synproxy(bool xdp) SYS("sysctl -w net.ipv4.tcp_syncookies=2"); SYS("sysctl -w net.ipv4.tcp_timestamps=1"); SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); - SYS("iptables -t raw -I PREROUTING \ + SYS("iptables-legacy -t raw -I PREROUTING \ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); - SYS("iptables -t filter -A INPUT \ + SYS("iptables-legacy -t filter -A INPUT \ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); - SYS("iptables -t filter -A INPUT \ + SYS("iptables-legacy -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c new file mode 100644 index 000000000000..8b03c9bb4862 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Topology: + * --------- + * NS0 namespace | NS1 namespace | NS2 namespace + * | | + * +---------------+ | +---------------+ | + * | ipsec0 |---------| ipsec0 | | + * | 192.168.1.100 | | | 192.168.1.200 | | + * | if_id: bpf | | +---------------+ | + * +---------------+ | | + * | | | +---------------+ + * | | | | ipsec0 | + * \------------------------------------------| 192.168.1.200 | + * | | +---------------+ + * | | + * | | (overlay network) + * ------------------------------------------------------ + * | | (underlay network) + * +--------------+ | +--------------+ | + * | veth01 |----------| veth10 | | + * | 172.16.1.100 | | | 172.16.1.200 | | + * ---------------+ | +--------------+ | + * | | + * +--------------+ | | +--------------+ + * | veth02 |-----------------------------------| veth20 | + * | 172.16.2.100 | | | | 172.16.2.200 | + * +--------------+ | | +--------------+ + * + * + * Test Packet flow + * ----------- + * The tests perform 'ping 192.168.1.200' from the NS0 namespace: + * 1) request is routed to NS0 ipsec0 + * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based + * on the requested value. This makes the ipsec0 device in external mode + * select the destination tunnel + * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was + * used) and response is sent + * 4) response is received on NS0 ipsec0, tc ingress program is triggered and + * records the response if_id + * 5) requested if_id is compared with received if_id + */ + +#include <net/if.h> +#include <linux/rtnetlink.h> +#include <linux/if_link.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "xfrm_info.skel.h" + +#define NS0 "xfrm_test_ns0" +#define NS1 "xfrm_test_ns1" +#define NS2 "xfrm_test_ns2" + +#define IF_ID_0_TO_1 1 +#define IF_ID_0_TO_2 2 +#define IF_ID_1 3 +#define IF_ID_2 4 + +#define IP4_ADDR_VETH01 "172.16.1.100" +#define IP4_ADDR_VETH10 "172.16.1.200" +#define IP4_ADDR_VETH02 "172.16.2.100" +#define IP4_ADDR_VETH20 "172.16.2.200" + +#define ESP_DUMMY_PARAMS \ + "proto esp aead 'rfc4106(gcm(aes))' " \ + "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + +static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{ + LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, + .prog_fd = igr_fd); + LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1, + .prog_fd = egr_fd); + int ret; + + ret = bpf_tc_hook_create(hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (igr_fd >= 0) { + hook->attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + if (egr_fd >= 0) { + hook->attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + return 0; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1); + SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2); +} + +static int config_underlay(void) +{ + SYS("ip netns add " NS0); + SYS("ip netns add " NS1); + SYS("ip netns add " NS2); + + /* NS0 <-> NS1 [veth01 <-> veth10] */ + SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS("ip -net " NS0 " link set dev veth01 up"); + SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS("ip -net " NS1 " link set dev veth10 up"); + + /* NS0 <-> NS2 [veth02 <-> veth20] */ + SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS("ip -net " NS0 " link set dev veth02 up"); + SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS("ip -net " NS2 " link set dev veth20 up"); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, + const char *ipv4_remote, int if_id) +{ + /* State: local -> remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); + + /* State: local <- remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); + + /* Policy: local -> remote */ + SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_local, ipv4_remote, if_id); + + /* Policy: local <- remote */ + SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_remote, ipv4_local, if_id); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b, + const char *ipv4_a, const char *ipv4_b, + int if_id_a, int if_id_b) +{ + return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) || + setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b); +} + +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type, + unsigned short len) +{ + struct rtattr *rta = + (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + return rta; +} + +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, + const char *s) +{ + struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + + memcpy(RTA_DATA(rta), s, strlen(s)); + return rta; +} + +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type) +{ + return rtattr_add(nh, type, 0); +} + +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + uint8_t *end = (uint8_t *)nh + nh->nlmsg_len; + + attr->rta_len = end - (uint8_t *)attr; +} + +static int setup_xfrmi_external_dev(const char *ns) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + unsigned char data[128]; + } req; + struct rtattr *link_info, *info_data; + struct nstoken *nstoken; + int ret = -1, sock = -1; + struct nlmsghdr *nh; + + memset(&req, 0, sizeof(req)); + nh = &req.nh; + nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + nh->nlmsg_type = RTM_NEWLINK; + nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST; + + rtattr_add_str(nh, IFLA_IFNAME, "ipsec0"); + link_info = rtattr_begin(nh, IFLA_LINKINFO); + rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm"); + info_data = rtattr_begin(nh, IFLA_INFO_DATA); + rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0); + rtattr_end(nh, info_data); + rtattr_end(nh, link_info); + + nstoken = open_netns(ns); + if (!ASSERT_OK_PTR(nstoken, "setns")) + goto done; + + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); + if (!ASSERT_GE(sock, 0, "netlink socket")) + goto done; + ret = send(sock, nh, nh->nlmsg_len, 0); + if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length")) + goto done; + + ret = 0; +done: + if (sock != -1) + close(sock); + if (nstoken) + close_netns(nstoken); + return ret; +} + +static int config_overlay(void) +{ + if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10, + IF_ID_0_TO_1, IF_ID_1)) + goto fail; + if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20, + IF_ID_0_TO_2, IF_ID_2)) + goto fail; + + /* Older iproute2 doesn't support this option */ + if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) + goto fail; + + SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS("ip -net " NS0 " link set dev ipsec0 up"); + + SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS1 " link set dev ipsec0 up"); + + SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS2 " link set dev ipsec0 up"); + + return 0; +fail: + return -1; +} + +static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) +{ + skel->bss->req_if_id = if_id; + + SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); + + if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) + goto fail; + + return 0; +fail: + return -1; +} + +static void _test_xfrm_info(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd; + struct nstoken *nstoken = NULL; + struct xfrm_info *skel; + int ifindex; + + /* load and attach bpf progs to ipsec dev tc hook point */ + skel = xfrm_info__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load")) + goto done; + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto done; + ifindex = if_nametoindex("ipsec0"); + if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info); + get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info); + if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd, + set_xfrm_info_prog_fd)) + goto done; + + /* perform test */ + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1)) + goto done; + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2)) + goto done; + +done: + if (nstoken) + close_netns(nstoken); + xfrm_info__destroy(skel); +} + +void test_xfrm_info(void) +{ + cleanup(); + + if (!ASSERT_OK(config_underlay(), "config_underlay")) + goto done; + if (!ASSERT_OK(config_overlay(), "config_overlay")) + goto done; + + if (test__start_subtest("xfrm_info")) + _test_xfrm_info(); + +done: + cleanup(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index 6286023fd62b..c5969ca6f26b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -19,13 +19,20 @@ struct { __type(value, __u64); } arraymap1 SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, __u64); + __type(value, __u32); +} hashmap1 SEC(".maps"); + __u32 key_sum = 0; __u64 val_sum = 0; SEC("iter/bpf_map_elem") int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) { - __u32 *key = ctx->key; + __u32 *hmap_val, *key = ctx->key; __u64 *val = ctx->value; if (key == (void *)0 || val == (void *)0) @@ -35,6 +42,18 @@ int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); key_sum += *key; val_sum += *val; + + /* workaround - It's necessary to do this convoluted (val, key) + * write into hashmap1, instead of simply doing + * bpf_map_update_elem(&hashmap1, val, key, BPF_ANY); + * because key has MEM_RDONLY flag and bpf_map_update elem expects + * types without this flag + */ + bpf_map_update_elem(&hashmap1, val, val, BPF_ANY); + hmap_val = bpf_map_lookup_elem(&hashmap1, val); + if (hmap_val) + *hmap_val = *key; + *val = *key; return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 285c008cbf9c..9ba14c37bbcc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -7,14 +7,14 @@ char _license[] SEC("license") = "GPL"; unsigned long last_sym_value = 0; -static inline char tolower(char c) +static inline char to_lower(char c) { if (c >= 'A' && c <= 'Z') c += ('a' - 'A'); return c; } -static inline char toupper(char c) +static inline char to_upper(char c) { if (c >= 'a' && c <= 'z') c -= ('a' - 'A'); @@ -54,7 +54,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx) type = iter->type; if (iter->module_name[0]) { - type = iter->exported ? toupper(type) : tolower(type); + type = iter->exported ? to_upper(type) : to_lower(type); BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", value, type, iter->name, iter->module_name); } else { diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 5bb11fe595a4..4a01ea9113bf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -2,6 +2,11 @@ #ifndef __BPF_MISC_H__ #define __BPF_MISC_H__ +#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) +#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) +#define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) +#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) + #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__x64_" diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index adb087aecc9e..b394817126cf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -25,6 +25,9 @@ #define IPV6_TCLASS 67 #define IPV6_AUTOFLOWLABEL 70 +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_SHOT 2 + #define SOL_TCP 6 #define TCP_NODELAY 1 #define TCP_MAXSEG 2 diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index f2661c8d2d90..7cb522d22a66 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -102,12 +102,21 @@ struct zone { struct zone_padding __pad__; }; +/* ----- START-EXPECTED-OUTPUT ----- */ +struct padding_wo_named_members { + long: 64; + long: 64; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + int f(struct { struct padded_implicitly _1; struct padded_explicitly _2; struct padded_a_lot _3; struct padded_cache_line _4; struct zone _5; + struct padding_wo_named_members _6; } *_) { return 0; diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c index 8feddb8289cf..38f78d9345de 100644 --- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c +++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c @@ -64,3 +64,4 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path) return 0; } +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h new file mode 100644 index 000000000000..7d30855bfe78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _CGRP_KFUNC_COMMON_H +#define _CGRP_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __cgrps_kfunc_map_value { + struct cgroup __kptr_ref * cgrp; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __cgrps_kfunc_map_value); + __uint(max_entries, 1); +} __cgrps_kfunc_map SEC(".maps"); + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; + +static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) +{ + s32 id; + long status; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); +} + +static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return status; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) { + bpf_map_delete_elem(&__cgrps_kfunc_map, &id); + return -ENOENT; + } + + acquired = bpf_cgroup_acquire(cgrp); + old = bpf_kptr_xchg(&v->cgrp, acquired); + if (old) { + bpf_cgroup_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _CGRP_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c new file mode 100644 index 000000000000..a1369b5ebcf8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp) +{ + int status; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + return NULL; + + return cgrps_kfunc_map_value_lookup(cgrp); +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */ + acquired = bpf_cgroup_acquire(v->cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path; + + /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */ + acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("kretprobe/cgroup_destroy_locked") +int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) +{ + struct cgroup *acquired; + + /* Can't acquire an untrusted struct cgroup * pointer. */ + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */ + acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */ + acquired = bpf_cgroup_acquire(NULL); + if (!acquired) + return 0; + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Acquired cgroup is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */ + kptr = bpf_cgroup_kptr_get(&cgrp); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr, *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */ + kptr = bpf_cgroup_kptr_get(&acquired); + bpf_cgroup_release(acquired); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */ + kptr = bpf_cgroup_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */ + bpf_cgroup_release(v->cgrp); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired = (struct cgroup *)&path; + + /* Cannot release random frame pointer. */ + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return 0; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) + return -ENOENT; + + acquired = bpf_cgroup_acquire(cgrp); + + old = bpf_kptr_xchg(&v->cgrp, acquired); + + /* old cannot be passed to bpf_cgroup_release() without a NULL check. */ + bpf_cgroup_release(old); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path) +{ + /* Cannot release trusted cgroup pointer which was not acquired. */ + bpf_cgroup_release(cgrp); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c new file mode 100644 index 000000000000..0c23ea32df9f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid, invocations; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + bool same = pid == cur_pid; + + if (same) + __sync_fetch_and_add(&invocations, 1); + + return same; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + if (!is_test_kfunc_task()) + return 0; + + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) +{ + struct cgroup *self, *ancestor1, *invalid; + + if (!is_test_kfunc_task()) + return 0; + + self = bpf_cgroup_ancestor(cgrp, cgrp->level); + if (!self) { + err = 1; + return 0; + } + + if (self->self.id != cgrp->self.id) { + bpf_cgroup_release(self); + err = 2; + return 0; + } + bpf_cgroup_release(self); + + ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!ancestor1) { + err = 3; + return 0; + } + bpf_cgroup_release(ancestor1); + + invalid = bpf_cgroup_ancestor(cgrp, 10000); + if (invalid) { + bpf_cgroup_release(invalid); + err = 4; + return 0; + } + + invalid = bpf_cgroup_ancestor(cgrp, -1); + if (invalid) { + bpf_cgroup_release(invalid); + err = 5; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c new file mode 100644 index 000000000000..6652d18465b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +struct socket_cookie { + __u64 cookie_key; + __u64 cookie_value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct socket_cookie); +} socket_cookies SEC(".maps"); + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!p) + return 1; + + p->cookie_value = 0xF; + p->cookie_key = bpf_get_socket_cookie(ctx); + return 1; +} + +SEC("sockops") +int update_cookie_sockops(struct bpf_sock_ops *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 1; + + if (p->cookie_key != bpf_get_socket_cookie(ctx)) + return 1; + + p->cookie_value |= (ctx->local_port << 8); + return 1; +} + +SEC("fexit/inet_stream_connect") +int BPF_PROG(update_cookie_tracing, struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + + if (uaddr->sa_family != AF_INET6) + return 0; + + p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 0; + + if (p->cookie_key != bpf_get_socket_cookie(sock->sk)) + return 0; + + p->cookie_value |= 0xF0; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c new file mode 100644 index 000000000000..d41f90e2ab64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + (void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c new file mode 100644 index 000000000000..a043d8fefdac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +SEC("fentry/bpf_local_storage_lookup") +int BPF_PROG(on_lookup) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + return 0; +} + +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 200; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 100; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c new file mode 100644 index 000000000000..2d11ed528b6f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +__u32 target_pid; +__u64 cgroup_id; + +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("?iter.s/cgroup") +int cgroup_iter(struct bpf_iter__cgroup *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct cgroup *cgrp = ctx->cgroup; + long *ptr; + + if (cgrp == NULL) + return 0; + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int no_rcu_lock(void *ctx) +{ + struct task_struct *task; + struct cgroup *cgrp; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* ptr_to_btf_id semantics. should work. */ + cgrp = task->cgroups->dfl_cgrp; + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int yes_rcu_lock(void *ctx) +{ + struct task_struct *task; + struct cgroup *cgrp; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = task->cgroups->dfl_cgrp; + /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */ + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c new file mode 100644 index 000000000000..9ebb8e2fe541 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +#define MAGIC_VALUE 0xabcd1234 + +pid_t target_pid = 0; +int mismatch_cnt = 0; +int enter_cnt = 0; +int exit_cnt = 0; + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + int err; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* populate value 0 */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + /* delete value 0 */ + err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + if (err) + return 0; + + /* value is not available */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + if (ptr) + return 0; + + /* re-populate the value */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + __sync_fetch_and_add(&enter_cnt, 1); + *ptr = MAGIC_VALUE + enter_cnt; + + return 0; +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + __sync_fetch_and_add(&exit_cnt, 1); + if (*ptr != MAGIC_VALUE + exit_cnt) + __sync_fetch_and_add(&mismatch_cnt, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index b0f08ff024fb..78debc1b3820 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -43,6 +43,7 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); } ringbuf SEC(".maps"); int err, val; @@ -66,6 +67,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr) * bpf_ringbuf_submit/discard_dynptr call */ SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") int ringbuf_missing_release1(void *ctx) { struct bpf_dynptr ptr; @@ -78,6 +80,7 @@ int ringbuf_missing_release1(void *ctx) } SEC("?raw_tp") +__failure __msg("Unreleased reference id=2") int ringbuf_missing_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -113,6 +116,7 @@ static int missing_release_callback_fn(__u32 index, void *data) /* Any dynptr initialized within a callback must have bpf_dynptr_put called */ SEC("?raw_tp") +__failure __msg("Unreleased reference id") int ringbuf_missing_release_callback(void *ctx) { bpf_loop(10, missing_release_callback_fn, NULL, 0); @@ -121,6 +125,7 @@ int ringbuf_missing_release_callback(void *ctx) /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int ringbuf_release_uninit_dynptr(void *ctx) { struct bpf_dynptr ptr; @@ -133,6 +138,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -152,6 +158,7 @@ int use_after_invalid(void *ctx) /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ SEC("?raw_tp") +__failure __msg("type=mem expected=ringbuf_mem") int ringbuf_invalid_api(void *ctx) { struct bpf_dynptr ptr; @@ -174,6 +181,7 @@ done: /* Can't add a dynptr to a map */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int add_dynptr_to_map1(void *ctx) { struct bpf_dynptr ptr; @@ -191,6 +199,7 @@ int add_dynptr_to_map1(void *ctx) /* Can't add a struct with an embedded dynptr to a map */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int add_dynptr_to_map2(void *ctx) { struct test_info x; @@ -208,6 +217,7 @@ int add_dynptr_to_map2(void *ctx) /* A data slice can't be accessed out of bounds */ SEC("?raw_tp") +__failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -228,6 +238,7 @@ done: } SEC("?raw_tp") +__failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) { __u32 key = 0, map_val; @@ -248,6 +259,7 @@ int data_slice_out_of_bounds_map_value(void *ctx) /* A data slice can't be used after it has been released */ SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int data_slice_use_after_release1(void *ctx) { struct bpf_dynptr ptr; @@ -279,6 +291,7 @@ done: * ptr2 is at fp - 16). */ SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int data_slice_use_after_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -310,6 +323,7 @@ done: /* A data slice must be first checked for NULL */ SEC("?raw_tp") +__failure __msg("invalid mem access 'mem_or_null'") int data_slice_missing_null_check1(void *ctx) { struct bpf_dynptr ptr; @@ -330,6 +344,7 @@ int data_slice_missing_null_check1(void *ctx) /* A data slice can't be dereferenced if it wasn't checked for null */ SEC("?raw_tp") +__failure __msg("invalid mem access 'mem_or_null'") int data_slice_missing_null_check2(void *ctx) { struct bpf_dynptr ptr; @@ -352,6 +367,7 @@ done: * dynptr argument */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int invalid_helper1(void *ctx) { struct bpf_dynptr ptr; @@ -366,6 +382,7 @@ int invalid_helper1(void *ctx) /* A dynptr can't be passed into a helper function at a non-zero offset */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") int invalid_helper2(void *ctx) { struct bpf_dynptr ptr; @@ -381,6 +398,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -402,6 +420,7 @@ int invalid_write1(void *ctx) * offset */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") int invalid_write2(void *ctx) { struct bpf_dynptr ptr; @@ -425,6 +444,7 @@ int invalid_write2(void *ctx) * non-const offset */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") int invalid_write3(void *ctx) { struct bpf_dynptr ptr; @@ -456,6 +476,7 @@ static int invalid_write4_callback(__u32 index, void *data) * be invalidated as a dynptr */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int invalid_write4(void *ctx) { struct bpf_dynptr ptr; @@ -472,7 +493,9 @@ int invalid_write4(void *ctx) /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ struct bpf_dynptr global_dynptr; + SEC("?raw_tp") +__failure __msg("type=map_value expected=fp") int global(void *ctx) { /* this should fail */ @@ -485,6 +508,7 @@ int global(void *ctx) /* A direct read should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read1(void *ctx) { struct bpf_dynptr ptr; @@ -501,6 +525,7 @@ int invalid_read1(void *ctx) /* A direct read at an offset should fail */ SEC("?raw_tp") +__failure __msg("cannot pass in dynptr at an offset") int invalid_read2(void *ctx) { struct bpf_dynptr ptr; @@ -516,6 +541,7 @@ int invalid_read2(void *ctx) /* A direct read at an offset into the lower stack slot should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read3(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -542,6 +568,7 @@ static int invalid_read4_callback(__u32 index, void *data) /* A direct read within a callback function should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read4(void *ctx) { struct bpf_dynptr ptr; @@ -557,6 +584,7 @@ int invalid_read4(void *ctx) /* Initializing a dynptr on an offset should fail */ SEC("?raw_tp") +__failure __msg("invalid write to stack") int invalid_offset(void *ctx) { struct bpf_dynptr ptr; @@ -571,6 +599,7 @@ int invalid_offset(void *ctx) /* Can't release a dynptr twice */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int release_twice(void *ctx) { struct bpf_dynptr ptr; @@ -597,6 +626,7 @@ static int release_twice_callback_fn(__u32 index, void *data) * within a calback function, fails */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int release_twice_callback(void *ctx) { struct bpf_dynptr ptr; @@ -612,6 +642,7 @@ int release_twice_callback(void *ctx) /* Reject unsupported local mem types for dynptr_from_mem API */ SEC("?raw_tp") +__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data") int dynptr_from_mem_invalid_api(void *ctx) { struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a3a6103c8569..35db7c6c1fc7 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -20,6 +20,7 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); } ringbuf SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c new file mode 100644 index 000000000000..4b0cd6753251 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/empty_skb.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +int ifindex; +int ret; + +SEC("lwt_xmit") +int redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("lwt_xmit") +int redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} + +SEC("tc") +int tc_redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("tc") +int tc_redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 98c3399e15c0..9e1ca8e34913 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -110,3 +110,53 @@ int test_kretprobe_manual(struct pt_regs *ctx) kprobe_multi_check(ctx, true); return 0; } + +extern const void bpf_testmod_fentry_test1 __ksym; +extern const void bpf_testmod_fentry_test2 __ksym; +extern const void bpf_testmod_fentry_test3 __ksym; + +__u64 kprobe_testmod_test1_result = 0; +__u64 kprobe_testmod_test2_result = 0; +__u64 kprobe_testmod_test3_result = 0; + +__u64 kretprobe_testmod_test1_result = 0; +__u64 kretprobe_testmod_test2_result = 0; +__u64 kretprobe_testmod_test3_result = 0; + +static void kprobe_multi_testmod_check(void *ctx, bool is_return) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + __u64 addr = bpf_get_func_ip(ctx); + + if (is_return) { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kretprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kretprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kretprobe_testmod_test3_result = 1; + } else { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kprobe_testmod_test3_result = 1; + } +} + +SEC("kprobe.multi") +int test_kprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, false); + return 0; +} + +SEC("kretprobe.multi") +int test_kretprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c new file mode 100644 index 000000000000..4ad88da5cda2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#include "linked_list.h" + +static __always_inline +int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 3; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 4; + } + + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + if (leave_in_map) + return 0; + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 6; + } + + bpf_spin_lock(lock); + f->data = 13; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 7; + f = container_of(n, struct foo, node); + if (f->data != 13) { + bpf_obj_drop(f); + return 8; + } + bpf_obj_drop(f); + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 9; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 10; + } + return 0; +} + + +static __always_inline +int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f[8], *pf; + int i; + + /* Loop following this check adds nodes 2-at-a-time in order to + * validate multiple release_on_unlock release logic + */ + if (ARRAY_SIZE(f) % 2) + return 10; + + for (i = 0; i < ARRAY_SIZE(f); i += 2) { + f[i] = bpf_obj_new(typeof(**f)); + if (!f[i]) + return 2; + f[i]->data = i; + + f[i + 1] = bpf_obj_new(typeof(**f)); + if (!f[i + 1]) { + bpf_obj_drop(f[i]); + return 9; + } + f[i + 1]->data = i + 1; + + bpf_spin_lock(lock); + bpf_list_push_front(head, &f[i]->node); + bpf_list_push_front(head, &f[i + 1]->node); + bpf_spin_unlock(lock); + } + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 3; + pf = container_of(n, struct foo, node); + if (pf->data != (ARRAY_SIZE(f) - i - 1)) { + bpf_obj_drop(pf); + return 4; + } + bpf_spin_lock(lock); + bpf_list_push_back(head, &pf->node); + bpf_spin_unlock(lock); + } + + if (leave_in_map) + return 0; + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + pf = container_of(n, struct foo, node); + if (pf->data != i) { + bpf_obj_drop(pf); + return 6; + } + bpf_obj_drop(pf); + } + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 7; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 8; + } + return 0; +} + +static __always_inline +int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct bar *ba[8], *b; + struct foo *f; + int i; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + for (i = 0; i < ARRAY_SIZE(ba); i++) { + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 3; + } + b->data = i; + bpf_spin_lock(&f->lock); + bpf_list_push_back(&f->head, &b->node); + bpf_spin_unlock(&f->lock); + } + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + + if (leave_in_map) + return 0; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 4; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 5; + } + + for (i = 0; i < ARRAY_SIZE(ba); i++) { + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (!n) { + bpf_obj_drop(f); + return 6; + } + b = container_of(n, struct bar, node); + if (b->data != i) { + bpf_obj_drop(f); + bpf_obj_drop(b); + return 7; + } + bpf_obj_drop(b); + } + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (n) { + bpf_obj_drop(f); + bpf_obj_drop(container_of(n, struct bar, node)); + return 8; + } + bpf_obj_drop(f); + return 0; +} + +static __always_inline +int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop(lock, head, false); + if (ret) + return ret; + return list_push_pop(lock, head, true); +} + +static __always_inline +int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop_multiple(lock ,head, false); + if (ret) + return ret; + return list_push_pop_multiple(lock, head, true); +} + +static __always_inline +int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_in_list(lock, head, false); + if (ret) + return ret; + return list_in_list(lock, head, true); +} + +SEC("tc") +int map_list_push_pop(void *ctx) +{ + struct map_value *v; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop(void *ctx) +{ + struct map_value *v; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop(void *ctx) +{ + return test_list_push_pop(&glock, &ghead); +} + +SEC("tc") +int map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop_multiple(void *ctx) +{ + int ret; + + ret = list_push_pop_multiple(&glock, &ghead, false); + if (ret) + return ret; + return list_push_pop_multiple(&glock, &ghead, true); +} + +SEC("tc") +int map_list_in_list(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_in_list(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int global_list_in_list(void *ctx) +{ + return test_list_in_list(&glock, &ghead); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h new file mode 100644 index 000000000000..3fb2412552fc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef LINKED_LIST_H +#define LINKED_LIST_H + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct bar { + struct bpf_list_node node; + int data; +}; + +struct foo { + struct bpf_list_node node; + struct bpf_list_head head __contains(bar, node); + struct bpf_spin_lock lock; + int data; + struct bpf_list_node node2; +}; + +struct map_value { + struct bpf_spin_lock lock; + int data; + struct bpf_list_head head __contains(foo, node); +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +}; + +struct array_map array_map SEC(".maps"); +struct array_map inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_list_head ghead __contains(foo, node); +private(B) struct bpf_spin_lock glock2; + +#endif diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c new file mode 100644 index 000000000000..1d9017240e19 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#include "linked_list.h" + +#define INIT \ + struct map_value *v, *v2, *iv, *iv2; \ + struct foo *f, *f1, *f2; \ + struct bar *b; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \ + if (!map) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v) \ + return 0; \ + v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v2) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv) \ + return 0; \ + iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv2) \ + return 0; \ + f = bpf_obj_new(typeof(*f)); \ + if (!f) \ + return 0; \ + f1 = f; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + b = bpf_obj_new(typeof(*b)); \ + if (!b) { \ + bpf_obj_drop(f2); \ + bpf_obj_drop(f1); \ + return 0; \ + } + +#define CHECK(test, op, hexpr) \ + SEC("?tc") \ + int test##_missing_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + p(hexpr); \ + return 0; \ + } + +CHECK(kptr, push_front, &f->head); +CHECK(kptr, push_back, &f->head); +CHECK(kptr, pop_front, &f->head); +CHECK(kptr, pop_back, &f->head); + +CHECK(global, push_front, &ghead); +CHECK(global, push_back, &ghead); +CHECK(global, pop_front, &ghead); +CHECK(global, pop_back, &ghead); + +CHECK(map, push_front, &v->head); +CHECK(map, push_back, &v->head); +CHECK(map, pop_front, &v->head); +CHECK(map, pop_back, &v->head); + +CHECK(inner_map, push_front, &iv->head); +CHECK(inner_map, push_back, &iv->head); +CHECK(inner_map, pop_front, &iv->head); +CHECK(inner_map, pop_back, &iv->head); + +#undef CHECK + +#define CHECK(test, op, lexpr, hexpr) \ + SEC("?tc") \ + int test##_incorrect_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + bpf_spin_lock(lexpr); \ + p(hexpr); \ + return 0; \ + } + +#define CHECK_OP(op) \ + CHECK(kptr_kptr, op, &f1->lock, &f2->head); \ + CHECK(kptr_global, op, &f1->lock, &ghead); \ + CHECK(kptr_map, op, &f1->lock, &v->head); \ + CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \ + \ + CHECK(global_global, op, &glock2, &ghead); \ + CHECK(global_kptr, op, &glock, &f1->head); \ + CHECK(global_map, op, &glock, &v->head); \ + CHECK(global_inner_map, op, &glock, &iv->head); \ + \ + CHECK(map_map, op, &v->lock, &v2->head); \ + CHECK(map_kptr, op, &v->lock, &f2->head); \ + CHECK(map_global, op, &v->lock, &ghead); \ + CHECK(map_inner_map, op, &v->lock, &iv->head); \ + \ + CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \ + CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \ + CHECK(inner_map_global, op, &iv->lock, &ghead); \ + CHECK(inner_map_map, op, &iv->lock, &v->head); + +CHECK_OP(push_front); +CHECK_OP(push_back); +CHECK_OP(pop_front); +CHECK_OP(pop_back); + +#undef CHECK +#undef CHECK_OP +#undef INIT + +SEC("?kprobe/xyz") +int map_compat_kprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?kretprobe/xyz") +int map_compat_kretprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tracepoint/xyz") +int map_compat_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?perf_event") +int map_compat_perf(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp/xyz") +int map_compat_raw_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp.w/xyz") +int map_compat_raw_tp_w(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tc") +int obj_type_id_oor(void *ctx) +{ + bpf_obj_new_impl(~0UL, NULL); + return 0; +} + +SEC("?tc") +int obj_new_no_composite(void *ctx) +{ + bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42); + return 0; +} + +SEC("?tc") +int obj_new_no_struct(void *ctx) +{ + + bpf_obj_new(union { int data; unsigned udata; }); + return 0; +} + +SEC("?tc") +int obj_drop_non_zero_off(void *ctx) +{ + void *f; + + f = bpf_obj_new(struct foo); + if (!f) + return 0; + bpf_obj_drop(f+1); + return 0; +} + +SEC("?tc") +int new_null_ret(void *ctx) +{ + return bpf_obj_new(struct foo)->data; +} + +SEC("?tc") +int obj_new_acq(void *ctx) +{ + bpf_obj_new(struct foo); + return 0; +} + +SEC("?tc") +int use_after_drop(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_obj_drop(f); + return f->data; +} + +SEC("?tc") +int ptr_walk_scalar(void *ctx) +{ + struct test1 { + struct test2 { + struct test2 *next; + } *ptr; + } *p; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_this_cpu_ptr(p->ptr); + return 0; +} + +SEC("?tc") +int direct_read_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->lock; +} + +SEC("?tc") +int direct_write_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->lock = 0; + return 0; +} + +SEC("?tc") +int direct_read_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->head; +} + +SEC("?tc") +int direct_write_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->head = 0; + return 0; +} + +SEC("?tc") +int direct_read_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->node; +} + +SEC("?tc") +int direct_write_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->node = 0; + return 0; +} + +static __always_inline +int write_after_op(void (*push_op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + push_op(&ghead, &f->node); + f->data = 42; + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int write_after_push_front(void *ctx) +{ + return write_after_op((void *)bpf_list_push_front); +} + +SEC("?tc") +int write_after_push_back(void *ctx) +{ + return write_after_op((void *)bpf_list_push_back); +} + +static __always_inline +int use_after_unlock(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + f->data = 42; + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return f->data; +} + +SEC("?tc") +int use_after_unlock_push_front(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_front); +} + +SEC("?tc") +int use_after_unlock_push_back(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_back); +} + +static __always_inline +int list_double_add(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + op(&ghead, &f->node); + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int double_push_front(void *ctx) +{ + return list_double_add((void *)bpf_list_push_front); +} + +SEC("?tc") +int double_push_back(void *ctx) +{ + return list_double_add((void *)bpf_list_push_back); +} + +SEC("?tc") +int no_node_value_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(struct { int data; }); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, p); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_value_type(void *ctx) +{ + struct bar *b; + + b = bpf_obj_new(typeof(*b)); + if (!b) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &b->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_var_off(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off1(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + 1); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off2(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &f->node2); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int no_head_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(typeof(struct { int data; })); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(p, NULL); + bpf_spin_lock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off1(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off2(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off1(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 0; + } + + bpf_spin_lock(&f->lock); + bpf_list_push_front((void *)&f->head + 1, &b->node); + bpf_spin_unlock(&f->lock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off2(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + 1, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +static __always_inline +int pop_ptr_off(void *(*op)(void *head)) +{ + struct { + struct bpf_list_head head __contains(foo, node2); + struct bpf_spin_lock lock; + } *p; + struct bpf_list_node *n; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_spin_lock(&p->lock); + n = op(&p->head); + bpf_spin_unlock(&p->lock); + + bpf_this_cpu_ptr(n); + return 0; +} + +SEC("?tc") +int pop_front_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_front); +} + +SEC("?tc") +int pop_back_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_back); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c index 4f2d60b87b75..02c11d16b692 100644 --- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -7,6 +7,10 @@ char _license[] SEC("license") = "GPL"; +extern bool CONFIG_SECURITY_SELINUX __kconfig __weak; +extern bool CONFIG_SECURITY_SMACK __kconfig __weak; +extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak; + #ifndef AF_PACKET #define AF_PACKET 17 #endif @@ -140,6 +144,10 @@ SEC("lsm_cgroup/sk_alloc_security") int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) { called_socket_alloc++; + /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */ + if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR) + return 1; + if (family == AF_UNIX) return 0; /* EPERM */ diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 05e209b1b12a..760e41e1a632 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -3,6 +3,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> +#include "bpf_misc.h" struct map_value { char buf[8]; @@ -23,6 +24,7 @@ extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; SEC("?tc") +__failure __msg("kptr access size must be BPF_DW") int size_not_bpf_dw(struct __sk_buff *ctx) { struct map_value *v; @@ -37,6 +39,7 @@ int size_not_bpf_dw(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access cannot have variable offset") int non_const_var_off(struct __sk_buff *ctx) { struct map_value *v; @@ -55,6 +58,7 @@ int non_const_var_off(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 doesn't have constant offset. kptr has to be") int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) { struct map_value *v; @@ -73,6 +77,7 @@ int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access misaligned expected=8 off=7") int misaligned_access_write(struct __sk_buff *ctx) { struct map_value *v; @@ -88,6 +93,7 @@ int misaligned_access_write(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access misaligned expected=8 off=1") int misaligned_access_read(struct __sk_buff *ctx) { struct map_value *v; @@ -101,6 +107,7 @@ int misaligned_access_read(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)") int reject_var_off_store(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -124,6 +131,7 @@ int reject_var_off_store(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc") int reject_bad_type_match(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -144,6 +152,7 @@ int reject_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") int marked_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -158,6 +167,7 @@ int marked_as_untrusted_or_null(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4") int correct_btf_id_check_size(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -175,6 +185,7 @@ int correct_btf_id_check_size(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_") int inherit_untrusted_on_walk(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -194,6 +205,7 @@ int inherit_untrusted_on_walk(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("off=8 kptr isn't referenced kptr") int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) { struct map_value *v; @@ -208,6 +220,7 @@ int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 expected pointer to map value") int reject_kptr_get_no_map_val(struct __sk_buff *ctx) { bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0); @@ -215,6 +228,7 @@ int reject_kptr_get_no_map_val(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 expected pointer to map value") int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) { bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0); @@ -222,6 +236,7 @@ int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 no referenced kptr at map value offset=0") int reject_kptr_get_no_kptr(struct __sk_buff *ctx) { struct map_value *v; @@ -236,6 +251,7 @@ int reject_kptr_get_no_kptr(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 no referenced kptr at map value offset=8") int reject_kptr_get_on_unref(struct __sk_buff *ctx) { struct map_value *v; @@ -250,6 +266,7 @@ int reject_kptr_get_on_unref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kernel function bpf_kfunc_call_test_kptr_get args#0") int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) { struct map_value *v; @@ -264,6 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -278,6 +296,7 @@ int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("store to referenced kptr disallowed") int reject_untrusted_store_to_ref(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -297,6 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R2 type=untrusted_ptr_ expected=ptr_") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -315,6 +335,8 @@ int reject_untrusted_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure +__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member") int reject_bad_type_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *ref_ptr; @@ -333,6 +355,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc") int reject_member_of_ref_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *ref_ptr; @@ -351,6 +374,7 @@ int reject_member_of_ref_xchg(struct __sk_buff *ctx) } SEC("?syscall") +__failure __msg("kptr cannot be accessed indirectly by helper") int reject_indirect_helper_access(struct __sk_buff *ctx) { struct map_value *v; @@ -371,6 +395,7 @@ int write_func(int *p) } SEC("?tc") +__failure __msg("kptr cannot be accessed indirectly by helper") int reject_indirect_global_func_access(struct __sk_buff *ctx) { struct map_value *v; @@ -384,6 +409,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("Unreleased reference id=5 alloc_insn=") int kptr_xchg_ref_state(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -402,6 +428,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("Unreleased reference id=3 alloc_insn=") int kptr_get_ref_state(struct __sk_buff *ctx) { struct map_value *v; diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c new file mode 100644 index 000000000000..125f908024d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +__u32 user_data, key_serial, target_pid; +__u64 flags, task_storage_val, cgroup_id; + +struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +void bpf_key_put(struct bpf_key *key) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; +struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int get_cgroup_id(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* simulate bpf_get_current_cgroup_id() helper */ + bpf_rcu_read_lock(); + cgroups = task->cgroups; + if (!cgroups) + goto unlock; + cgroup_id = cgroups->dfl_cgrp->kn->id; +unlock: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_succ(void *ctx) +{ + struct task_struct *task, *real_parent; + long init_val = 2; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + bpf_rcu_read_lock(); + /* region including helper using rcu ptr real_parent */ + real_parent = task->real_parent; + if (!real_parent) + goto out; + ptr = bpf_task_storage_get(&map_a, real_parent, &init_val, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + goto out; + ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (!ptr) + goto out; + task_storage_val = *ptr; +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int no_lock(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* no bpf_rcu_read_lock(), old code still works */ + task = bpf_get_current_task_btf(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int two_regions(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* two regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + bpf_rcu_read_lock(); + task = bpf_get_current_task_btf(); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int task_acquire(void *ctx) +{ + struct task_struct *task, *real_parent, *gparent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + + /* rcu_ptr->rcu_field */ + gparent = real_parent->real_parent; + if (!gparent) + goto out; + + /* acquire a reference which can be used outside rcu read lock region */ + gparent = bpf_task_acquire_not_zero(gparent); + if (!gparent) + /* Until we resolve the issues with using task->rcu_users, we + * expect bpf_task_acquire_not_zero() to return a NULL task. + * See the comment at the definition of + * bpf_task_acquire_not_zero() for more details. + */ + goto out; + + (void)bpf_task_storage_get(&map_a, gparent, 0, 0); + bpf_task_release(gparent); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_lock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_lock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_unlock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_unlock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_rcu_mismatch(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + /* non-sleepable: missing bpf_rcu_read_unlock() in one path */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (real_parent) + bpf_rcu_read_unlock(); +out: + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int inproper_sleepable_helper(void *ctx) +{ + struct task_struct *task, *real_parent; + struct pt_regs *regs; + __u32 value = 0; + void *ptr; + + task = bpf_get_current_task_btf(); + /* sleepable helper in rcu read lock region */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + regs = (struct pt_regs *)bpf_task_pt_regs(real_parent); + if (!regs) + goto out; + + ptr = (void *)PT_REGS_IP(regs); + (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0); + user_data = value; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?lsm.s/bpf") +int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *bkey; + + /* sleepable kfunc in rcu read lock region */ + bpf_rcu_read_lock(); + bkey = bpf_lookup_user_key(key_serial, flags); + bpf_rcu_read_unlock(); + if (!bkey) + return -1; + bpf_key_put(bkey); + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_non_rcuptr(void *ctx) +{ + struct task_struct *task, *last_wakee; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + /* the pointer last_wakee marked as untrusted */ + last_wakee = task->real_parent->last_wakee; + (void)bpf_task_storage_get(&map_a, last_wakee, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_rcuptr(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + /* helper use of rcu ptr outside the rcu read lock region */ + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int cross_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* rcu ptr define/use in different regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h new file mode 100644 index 000000000000..c0ffd171743e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _TASK_KFUNC_COMMON_H +#define _TASK_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __tasks_kfunc_map_value { + struct task_struct __kptr_ref * task; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __tasks_kfunc_map_value); + __uint(max_entries, 1); +} __tasks_kfunc_map SEC(".maps"); + +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym; + +static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p) +{ + s32 pid; + long status; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); +} + +static inline int tasks_kfunc_map_insert(struct task_struct *p) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return status; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) { + bpf_map_delete_elem(&__tasks_kfunc_map, &pid); + return -ENOENT; + } + + acquired = bpf_task_acquire(p); + old = bpf_kptr_xchg(&v->task, acquired); + if (old) { + bpf_task_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _TASK_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c new file mode 100644 index 000000000000..87fa1db9d9b5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task) +{ + int status; + + status = tasks_kfunc_map_insert(task); + if (status) + return NULL; + + return tasks_kfunc_map_value_lookup(task); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_acquire() on an untrusted pointer. */ + acquired = bpf_task_acquire(v->task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags; + + /* Can't invoke bpf_task_acquire() on a random frame pointer. */ + acquired = bpf_task_acquire((struct task_struct *)&stack_task); + bpf_task_release(acquired); + + return 0; +} + +SEC("kretprobe/free_task") +int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + /* Can't release a bpf_task_acquire()'d task without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ + acquired = bpf_task_acquire(task->last_wakee); + bpf_task_release(acquired); + + return 0; +} + + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a NULL pointer. */ + acquired = bpf_task_acquire(NULL); + if (!acquired) + return 0; + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + + /* Acquired task is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */ + kptr = bpf_task_kptr_get(&task); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr, *acquired; + + acquired = bpf_task_acquire(task); + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ + kptr = bpf_task_kptr_get(&acquired); + bpf_task_release(acquired); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a NULL pointer. */ + kptr = bpf_task_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_task_kptr_get(&v->task); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_release() on an untrusted pointer. */ + bpf_task_release(v->task); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired = (struct task_struct *)&clone_flags; + + /* Cannot release random frame pointer. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid); + if (status) + return 0; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) + return -ENOENT; + + acquired = bpf_task_acquire(task); + + old = bpf_kptr_xchg(&v->task, acquired); + + /* old cannot be passed to bpf_task_release() without a NULL check. */ + bpf_task_release(old); + bpf_task_release(old); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags) +{ + /* Cannot release trusted task pointer which was not acquired. */ + bpf_task_release(task); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(task->pid); + + /* Releasing bpf_task_from_pid() lookup without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("lsm/task_free") +int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) +{ + struct task_struct *acquired; + + /* the argument of lsm task_free hook is untrusted. */ + acquired = bpf_task_acquire(task); + bpf_task_release(acquired); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c new file mode 100644 index 000000000000..9f359cfd29e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + return pid == cur_pid; +} + +static int test_acquire_release(struct task_struct *task) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(task); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(bpf_get_current_task_btf()); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_task_kptr_get(&v->task); + if (kptr) { + /* Until we resolve the issues with using task->rcu_users, we + * expect bpf_task_kptr_get() to return a NULL task. See the + * comment at the definition of bpf_task_acquire_not_zero() for + * more details. + */ + bpf_task_release(kptr); + err = 3; + return 0; + } + + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + current = bpf_get_current_task_btf(); + acquired = bpf_task_acquire(current); + bpf_task_release(acquired); + + return 0; +} + +static void lookup_compare_pid(const struct task_struct *p) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(p->pid); + if (!acquired) { + err = 1; + return; + } + + if (acquired->pid != p->pid) + err = 2; + bpf_task_release(acquired); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(task); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(bpf_get_current_task_btf()); + return 0; +} + +static int is_pid_lookup_valid(s32 pid) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(pid); + if (acquired) { + bpf_task_release(acquired); + return 1; + } + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + if (is_pid_lookup_valid(-1)) { + err = 1; + return 0; + } + + if (is_pid_lookup_valid(0xcafef00d)) { + err = 2; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c index 81758c0aef99..41d88ed222ff 100644 --- a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c +++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c @@ -14,6 +14,7 @@ struct { __type(value, __u64); } task_storage SEC(".maps"); +int run_count = 0; int valid_ptr_count = 0; int null_ptr_count = 0; @@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task) __sync_fetch_and_add(&valid_ptr_count, 1); else __sync_fetch_and_add(&null_ptr_count, 1); + + __sync_fetch_and_add(&run_count, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index 564583dca7c8..4542dc683b44 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -5,7 +5,13 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#ifndef EBUSY +#define EBUSY 16 +#endif + char _license[] SEC("license") = "GPL"; +int nr_del_errs = 0; +int test_pid = 0; struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); @@ -26,6 +32,13 @@ int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + /* The bpf_task_storage_delete will call + * bpf_local_storage_lookup. The prog->active will + * stop the recursion. + */ bpf_task_storage_delete(&map_a, task); bpf_task_storage_delete(&map_b, task); return 0; @@ -37,11 +50,32 @@ int BPF_PROG(on_update) struct task_struct *task = bpf_get_current_task_btf(); long *ptr; + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + /* ptr will not be NULL when it is called from + * the bpf_task_storage_get(&map_b,...F_CREATE) in + * the BPF_PROG(on_enter) below. It is because + * the value can be found in map_a and the kernel + * does not need to acquire any spin_lock. + */ + if (ptr) { + int err; + *ptr += 1; + err = bpf_task_storage_delete(&map_a, task); + if (err == -EBUSY) + nr_del_errs++; + } + /* This will still fail because map_b is empty and + * this BPF_PROG(on_update) has failed to acquire + * the percpu busy lock => meaning potential + * deadlock is detected and it will fail to create + * new storage. + */ ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) @@ -57,14 +91,17 @@ int BPF_PROG(on_enter, struct pt_regs *regs, long id) long *ptr; task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 200; ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 100; return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c new file mode 100644 index 000000000000..ea2dbb80f7b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#ifndef EBUSY +#define EBUSY 16 +#endif + +extern bool CONFIG_PREEMPT __kconfig __weak; +int nr_get_errs = 0; +int nr_del_errs = 0; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +SEC("lsm.s/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct task_struct *task; + int ret, zero = 0; + int *value; + + if (!CONFIG_PREEMPT) + return 0; + + task = bpf_get_current_task_btf(); + value = bpf_task_storage_get(&task_storage, task, &zero, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!value) + __sync_fetch_and_add(&nr_get_errs, 1); + + ret = bpf_task_storage_delete(&task_storage, + bpf_get_current_task_btf()); + if (ret == -EBUSY) + __sync_fetch_and_add(&nr_del_errs, 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index ce39d096bba3..f4a8250329b2 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -33,18 +33,6 @@ int err, pid; char _license[] SEC("license") = "GPL"; SEC("?lsm.s/bpf") -int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr, - unsigned int size) -{ - char write_data[64] = "hello there, world!!"; - struct bpf_dynptr ptr; - - bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); - - return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL); -} - -SEC("?lsm.s/bpf") int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val; diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c new file mode 100644 index 000000000000..f5ac5f3e8919 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} data_input SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("lsm/bpf_map") +int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) +{ + if (map != (struct bpf_map *)&data_input) + return 0; + + if (fmode & FMODE_WRITE) + return -EACCES; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 2c121c5d66a7..d487153a839d 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -27,6 +27,7 @@ unsigned int nr_pure_ack = 0; unsigned int nr_data = 0; unsigned int nr_syn = 0; unsigned int nr_fin = 0; +unsigned int nr_hwtstamp = 0; /* Check the header received from the active side */ static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) @@ -146,6 +147,9 @@ static int check_active_hdr_in(struct bpf_sock_ops *skops) if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len) nr_pure_ack++; + if (skops->skb_hwtstamp) + nr_hwtstamp++; + return CG_OK; } diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 08628afedb77..8a1b50f3a002 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -110,4 +110,10 @@ int BPF_PROG(handle_fmod_ret, return 0; /* don't override the exit code */ } +SEC("kprobe.multi/bpf_testmod_test_read") +int BPF_PROG(kprobe_multi) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c new file mode 100644 index 000000000000..2760bf60d05a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, struct sample); + __type(value, int); +} hash_map SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* inner state */ +long seq = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_mem_map_key(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample, sample_copy; + int *lookup_val; + + if (cur_pid != pid) + return 0; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); + if (!sample) + return 0; + + sample->pid = pid; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + sample->seq = ++seq; + sample->value = 42; + + /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg + */ + lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); + + /* workaround - memcpy is necessary so that verifier doesn't + * complain with: + * verifier internal error: more than one arg with ref_obj_id R3 + * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY); + * + * Since bpf_map_lookup_elem above uses 'sample' as key, test using + * sample field as value below + */ + __builtin_memcpy(&sample_copy, sample, sizeof(struct sample)); + bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY); + + bpf_ringbuf_submit(sample, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1a4e93f6d9df..adece9f91f58 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -53,6 +53,20 @@ int out_mostly_var; char huge_arr[16 * 1024 * 1024]; +/* non-mmapable custom .data section */ + +struct my_value { int x, y, z; }; + +__hidden int zero_key SEC(".data.non_mmapable"); +static struct my_value zero_value SEC(".data.non_mmapable"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct my_value); + __uint(max_entries, 1); +} my_map SEC(".maps"); + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -75,6 +89,9 @@ int handler(const void *ctx) huge_arr[sizeof(huge_arr) - 1] = 123; + /* make sure zero_key and zero_value are not optimized out */ + bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 7e88309d3229..5bd10409285b 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -45,8 +45,8 @@ struct { #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) -SEC("tc") -int bpf_sping_lock_test(struct __sk_buff *skb) +SEC("cgroup_skb/ingress") +int bpf_spin_lock_test(struct __sk_buff *skb) { volatile int credit = 0, max_credit = 100, pkt_len = 64; struct hmap_elem zero = {}, *val; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c new file mode 100644 index 000000000000..86cd183ef6dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct foo { + struct bpf_spin_lock lock; + int data; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct foo); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &array_map, + }, +}; + +SEC(".data.A") struct bpf_spin_lock lockA; +SEC(".data.B") struct bpf_spin_lock lockB; + +SEC("?tc") +int lock_id_kptr_preserve(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_global_zero(void *ctx) +{ + bpf_this_cpu_ptr(&lockA); + return 0; +} + +SEC("?tc") +int lock_id_mapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + + f = bpf_map_lookup_elem(&array_map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_innermapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f = bpf_map_lookup_elem(map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +#define CHECK(test, A, B) \ + SEC("?tc") \ + int lock_id_mismatch_##test(void *ctx) \ + { \ + struct foo *f1, *f2, *v, *iv; \ + int key = 0; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &key); \ + if (!map) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &key); \ + if (!iv) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &key); \ + if (!v) \ + return 0; \ + f1 = bpf_obj_new(typeof(*f1)); \ + if (!f1) \ + return 0; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + bpf_spin_lock(A); \ + bpf_spin_unlock(B); \ + return 0; \ + } + +CHECK(kptr_kptr, &f1->lock, &f2->lock); +CHECK(kptr_global, &f1->lock, &lockA); +CHECK(kptr_mapval, &f1->lock, &v->lock); +CHECK(kptr_innermapval, &f1->lock, &iv->lock); + +CHECK(global_global, &lockA, &lockB); +CHECK(global_kptr, &lockA, &f1->lock); +CHECK(global_mapval, &lockA, &v->lock); +CHECK(global_innermapval, &lockA, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_mapval_mapval(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + + f1 = bpf_map_lookup_elem(&array_map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(&array_map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(mapval_kptr, &v->lock, &f1->lock); +CHECK(mapval_global, &v->lock, &lockB); +CHECK(mapval_innermapval, &v->lock, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval1(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval2(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(innermapval_kptr, &iv->lock, &f1->lock); +CHECK(innermapval_global, &iv->lock, &lockA); +CHECK(innermapval_mapval, &iv->lock, &v->lock); + +#undef CHECK + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c new file mode 100644 index 000000000000..eb78e6f03129 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} enter_id SEC(".maps"); + +#define IFNAMSIZ 16 + +int ifindex, ingress_ifindex; +char name[IFNAMSIZ]; +unsigned int inum; +unsigned int meta_len, frag0_len, kskb_len, kskb2_len; + +void *bpf_cast_to_kern_ctx(void *) __ksym; +void *bpf_rdonly_cast(void *, __u32) __ksym; + +SEC("?xdp") +int md_xdp(struct xdp_md *ctx) +{ + struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx); + struct net_device *dev; + + dev = kctx->rxq->dev; + ifindex = dev->ifindex; + inum = dev->nd_net.net->ns.inum; + __builtin_memcpy(name, dev->name, IFNAMSIZ); + ingress_ifindex = ctx->ingress_ifindex; + return XDP_PASS; +} + +SEC("?tc") +int md_skb(struct __sk_buff *skb) +{ + struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb); + struct skb_shared_info *shared_info; + struct sk_buff *kskb2; + + kskb_len = kskb->len; + + /* Simulate the following kernel macro: + * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) + */ + shared_info = bpf_rdonly_cast(kskb->head + kskb->end, + bpf_core_type_id_kernel(struct skb_shared_info)); + meta_len = shared_info->meta_len; + frag0_len = shared_info->frag_list->len; + + /* kskb2 should be equal to kskb */ + kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff)); + kskb2_len = kskb2->len; + return 0; +} + +SEC("?tp_btf/sys_enter") +int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) +{ + struct task_struct *task, *task_dup; + long *ptr; + + task = bpf_get_current_task_btf(); + task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); + (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0); + return 0; +} + +SEC("?tracepoint/syscalls/sys_enter_nanosleep") +int kctx_u64(void *ctx) +{ + u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64)); + + (void)kctx; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 82aba4529aa9..f3201dc69a60 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -18,6 +18,13 @@ struct { __uint(type, BPF_MAP_TYPE_USER_RINGBUF); } user_ringbuf SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 2); +} ringbuf SEC(".maps"); + +static int map_value; + static long bad_access1(struct bpf_dynptr *dynptr, void *context) { @@ -32,7 +39,7 @@ bad_access1(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read before the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_bad_access1(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0); @@ -54,7 +61,7 @@ bad_access2(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_bad_access2(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0); @@ -73,7 +80,7 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_write_forbidden(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0); @@ -92,7 +99,7 @@ null_context_write(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_null_context_write(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0); @@ -113,7 +120,7 @@ null_context_read(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_null_context_read(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0); @@ -132,7 +139,7 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_discard_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0); @@ -151,7 +158,7 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_submit_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0); @@ -168,10 +175,38 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp/") int user_ringbuf_callback_invalid_return(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); return 0; } + +static long +try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context) +{ + bpf_dynptr_from_mem(&map_value, 4, 0, dynptr); + return 0; +} + +static long +try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context) +{ + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr); + return 0; +} + +SEC("?raw_tp/") +int user_ringbuf_callback_reinit_dynptr_mem(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0); + return 0; +} + +SEC("?raw_tp/") +int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c new file mode 100644 index 000000000000..f6a501fbba2b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> + +struct bpf_xfrm_info___local { + u32 if_id; + int link; +} __attribute__((preserve_access_index)); + +__u32 req_if_id; +__u32 resp_if_id; + +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, + const struct bpf_xfrm_info___local *from) __ksym; +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, + struct bpf_xfrm_info___local *to) __ksym; + +SEC("tc") +int set_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info___local info = { .if_id = req_if_id }; + + return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC; +} + +SEC("tc") +int get_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info___local info = {}; + + if (bpf_skb_get_xfrm_info(skb, &info) < 0) + return TC_ACT_SHOT; + + resp_if_id = info.if_id; + + return TC_ACT_UNSPEC; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h index 711d5abb7d51..281f86132766 100644 --- a/tools/testing/selftests/bpf/task_local_storage_helpers.h +++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h @@ -7,8 +7,12 @@ #include <sys/types.h> #ifndef __NR_pidfd_open +#ifdef __alpha__ +#define __NR_pidfd_open 544 +#else #define __NR_pidfd_open 434 #endif +#endif static inline int sys_pidfd_open(pid_t pid, unsigned int flags) { diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh index 1bf81b49457a..b5520692f41b 100755 --- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -4,6 +4,9 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE_USED="metadata_used.bpf.o" +BPF_FILE_UNUSED="metadata_unused.bpf.o" + TESTNAME=bpftool_metadata BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) BPF_DIR=$BPF_FS/test_$TESTNAME @@ -55,7 +58,7 @@ mkdir $BPF_DIR trap cleanup EXIT -bpftool prog load metadata_unused.o $BPF_DIR/unused +bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null @@ -67,7 +70,7 @@ bpftool map | grep 'metadata.rodata' > /dev/null rm $BPF_DIR/unused -bpftool prog load metadata_used.o $BPF_DIR/used +bpftool prog load $BPF_FILE_USED $BPF_DIR/used METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index a6410bebe603..0cfece7ff4f8 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -309,11 +309,11 @@ class MainHeaderFileExtractor(SourceFileExtractor): commands), which looks to the lists of options in other source files but has different start and end markers: - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile(f'"OPTIONS :=') pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') @@ -336,7 +336,7 @@ class ManSubstitutionsExtractor(SourceFileExtractor): Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') pattern = re.compile('\*\*([\w/-]+)\*\*') @@ -501,6 +501,14 @@ def main(): source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE + # share the same enum value and source_map_types picks + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated. + # Replace 'cgroup_storage_deprecated' with 'cgroup_storage' + # so it aligns with what `bpftool map help` shows. + source_map_types.remove('cgroup_storage_deprecated') + source_map_types.add('cgroup_storage') + help_map_types = map_info.get_map_help() help_map_options = map_info.get_options() map_info.close() diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index 19ad172036da..0bd9990e83fa 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #include <iostream> -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/btf.h> #include <bpf/libbpf.h> -#pragma GCC diagnostic pop #include <bpf/bpf.h> #include <bpf/btf.h> #include "test_core_extern.skel.h" @@ -99,6 +99,7 @@ int main(int argc, char *argv[]) struct btf_dump_opts opts = { }; struct test_core_extern *skel; struct btf *btf; + int fd; try_skeleton_template(); @@ -117,6 +118,12 @@ int main(int argc, char *argv[]) skel = test_core_extern__open_and_load(); test_core_extern__destroy(skel); + fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (fd < 0) + std::cout << "FAILED to enable stats: " << fd << std::endl; + else + ::close(fd); + std::cout << "DONE!" << std::endl; return 0; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 5303ce0c977b..4b298863797a 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # # Load BPF flow dissector and verify it correctly dissects traffic + +BPF_FILE="bpf_flow.bpf.o" export TESTNAME=test_flow_dissector unmount=0 @@ -22,7 +24,7 @@ if [[ -z $(ip netns identify $$) ]]; then if bpftool="$(which bpftool)"; then echo "Testing global flow dissector..." - $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \ type flow_dissector if ! unshare --net $bpftool prog attach pinned \ @@ -95,7 +97,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s _dissect +./flow_dissector_load -p $BPF_FILE -s _dissect # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c new file mode 100644 index 000000000000..679efb3aa785 --- /dev/null +++ b/tools/testing/selftests/bpf/test_loader.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <stdlib.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#define str_has_pfx(str, pfx) \ + (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) + +#define TEST_LOADER_LOG_BUF_SZ 1048576 + +#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" +#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" +#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" + +struct test_spec { + const char *name; + bool expect_failure; + const char *expect_msg; + int log_level; +}; + +static int tester_init(struct test_loader *tester) +{ + if (!tester->log_buf) { + tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ; + tester->log_buf = malloc(tester->log_buf_sz); + if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf")) + return -ENOMEM; + } + + return 0; +} + +void test_loader_fini(struct test_loader *tester) +{ + if (!tester) + return; + + free(tester->log_buf); +} + +static int parse_test_spec(struct test_loader *tester, + struct bpf_object *obj, + struct bpf_program *prog, + struct test_spec *spec) +{ + struct btf *btf; + int func_id, i; + + memset(spec, 0, sizeof(*spec)); + + spec->name = bpf_program__name(prog); + + btf = bpf_object__btf(obj); + if (!btf) { + ASSERT_FAIL("BPF object has no BTF"); + return -EINVAL; + } + + func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC); + if (func_id < 0) { + ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name); + return -EINVAL; + } + + for (i = 1; i < btf__type_cnt(btf); i++) { + const struct btf_type *t; + const char *s; + + t = btf__type_by_id(btf, i); + if (!btf_is_decl_tag(t)) + continue; + + if (t->type != func_id || btf_decl_tag(t)->component_idx != -1) + continue; + + s = btf__str_by_offset(btf, t->name_off); + if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) { + spec->expect_failure = true; + } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { + spec->expect_failure = false; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { + spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { + errno = 0; + spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0); + if (errno) { + ASSERT_FAIL("failed to parse test log level from '%s'", s); + return -EINVAL; + } + } + } + + return 0; +} + +static void prepare_case(struct test_loader *tester, + struct test_spec *spec, + struct bpf_object *obj, + struct bpf_program *prog) +{ + int min_log_level = 0; + + if (env.verbosity > VERBOSE_NONE) + min_log_level = 1; + if (env.verbosity > VERBOSE_VERY) + min_log_level = 2; + + bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz); + + /* Make sure we set at least minimal log level, unless test requirest + * even higher level already. Make sure to preserve independent log + * level 4 (verifier stats), though. + */ + if ((spec->log_level & 3) < min_log_level) + bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level); + else + bpf_program__set_log_level(prog, spec->log_level); + + tester->log_buf[0] = '\0'; +} + +static void emit_verifier_log(const char *log_buf, bool force) +{ + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf); +} + +static void validate_case(struct test_loader *tester, + struct test_spec *spec, + struct bpf_object *obj, + struct bpf_program *prog, + int load_err) +{ + if (spec->expect_msg) { + char *match; + + match = strstr(tester->log_buf, spec->expect_msg); + if (!ASSERT_OK_PTR(match, "expect_msg")) { + /* if we are in verbose mode, we've already emitted log */ + if (env.verbosity == VERBOSE_NONE) + emit_verifier_log(tester->log_buf, true /*force*/); + fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg); + return; + } + } +} + +/* this function is forced noinline and has short generic name to look better + * in test_progs output (in case of a failure) + */ +static noinline +void run_subtest(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory) +{ + LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name); + struct bpf_object *obj = NULL, *tobj; + struct bpf_program *prog, *tprog; + const void *obj_bytes; + size_t obj_byte_cnt; + int err; + + if (tester_init(tester) < 0) + return; /* failed to initialize tester */ + + obj_bytes = elf_bytes_factory(&obj_byte_cnt); + obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); + if (!ASSERT_OK_PTR(obj, "obj_open_mem")) + return; + + bpf_object__for_each_program(prog, obj) { + const char *prog_name = bpf_program__name(prog); + struct test_spec spec; + + if (!test__start_subtest(prog_name)) + continue; + + /* if we can't derive test specification, go to the next test */ + err = parse_test_spec(tester, obj, prog, &spec); + if (!ASSERT_OK(err, "parse_test_spec")) + continue; + + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); + if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ + continue; + + bpf_object__for_each_program(tprog, tobj) + bpf_program__set_autoload(tprog, false); + + bpf_object__for_each_program(tprog, tobj) { + /* only load specified program */ + if (strcmp(bpf_program__name(tprog), prog_name) == 0) { + bpf_program__set_autoload(tprog, true); + break; + } + } + + prepare_case(tester, &spec, tobj, tprog); + + err = bpf_object__load(tobj); + if (spec.expect_failure) { + if (!ASSERT_ERR(err, "unexpected_load_success")) { + emit_verifier_log(tester->log_buf, false /*force*/); + goto tobj_cleanup; + } + } else { + if (!ASSERT_OK(err, "unexpected_load_failure")) { + emit_verifier_log(tester->log_buf, true /*force*/); + goto tobj_cleanup; + } + } + + emit_verifier_log(tester->log_buf, false /*force*/); + validate_case(tester, &spec, tobj, tprog, err); + +tobj_cleanup: + bpf_object__close(tobj); + } + + bpf_object__close(obj); +} + +void test_loader__run_subtests(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory) +{ + /* see comment in run_subtest() for why we do this function nesting */ + run_subtest(tester, skel_name, elf_bytes_factory); +} diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 6c69c42b1d60..1e565f47aca9 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -38,6 +38,7 @@ # ping: SRC->[encap at veth2:ingress]->GRE:decap->DST # ping replies go DST->SRC directly +BPF_FILE="test_lwt_ip_encap.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -373,14 +374,14 @@ test_egress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} else echo " unknown encap ${ENCAP}" TEST_STATUS=1 @@ -431,14 +432,14 @@ test_ingress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} else echo "FAIL: unknown encap ${ENCAP}" TEST_STATUS=1 diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 826f4423ce02..0efea2292d6a 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -23,6 +23,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE="test_lwt_seg6local.bpf.o" readonly NS1="ns1-$(mktemp -u XXXXXX)" readonly NS2="ns2-$(mktemp -u XXXXXX)" readonly NS3="ns3-$(mktemp -u XXXXXX)" @@ -117,18 +118,18 @@ ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 -ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2 ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 +ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4 -ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6 ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 +ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8 ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 7fc15e0d24a9..7cb1bc05e5cf 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -769,12 +769,14 @@ skip(ret != 0, "bpftool not installed") base_progs = progs _, base_maps = bpftool("map") base_map_names = [ - 'pid_iter.rodata' # created on each bpftool invocation + 'pid_iter.rodata', # created on each bpftool invocation + 'libbpf_det_bind', # created on each bpftool invocation ] # Check netdevsim -ret, out = cmd("modprobe netdevsim", fail=False) -skip(ret != 0, "netdevsim module could not be loaded") +if not os.path.isdir("/sys/bus/netdevsim/"): + ret, out = cmd("modprobe netdevsim", fail=False) + skip(ret != 0, "netdevsim module could not be loaded") # Check debugfs _, out = cmd("mount") diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 3fef451d8831..4716e38e153a 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -222,6 +222,26 @@ static char *test_result(bool failed, bool skipped) return failed ? "FAIL" : (skipped ? "SKIP" : "OK"); } +#define TEST_NUM_WIDTH 7 + +static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state) +{ + int skipped_cnt = test_state->skip_cnt; + int subtests_cnt = test_state->subtest_num; + + fprintf(env.stdout, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name); + if (test_state->error_cnt) + fprintf(env.stdout, "FAIL"); + else if (!skipped_cnt) + fprintf(env.stdout, "OK"); + else if (skipped_cnt == subtests_cnt || !subtests_cnt) + fprintf(env.stdout, "SKIP"); + else + fprintf(env.stdout, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt); + + fprintf(env.stdout, "\n"); +} + static void print_test_log(char *log_buf, size_t log_cnt) { log_buf[log_cnt] = '\0'; @@ -230,18 +250,6 @@ static void print_test_log(char *log_buf, size_t log_cnt) fprintf(env.stdout, "\n"); } -#define TEST_NUM_WIDTH 7 - -static void print_test_name(int test_num, const char *test_name, char *result) -{ - fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name); - - if (result) - fprintf(env.stdout, ":%s", result); - - fprintf(env.stdout, "\n"); -} - static void print_subtest_name(int test_num, int subtest_num, const char *test_name, char *subtest_name, char *result) @@ -307,8 +315,7 @@ static void dump_test_log(const struct prog_test_def *test, subtest_state->skipped)); } - print_test_name(test->test_num, test->test_name, - test_result(test_failed, test_state->skip_cnt)); + print_test_result(test, test_state); } static void stdio_restore(void); @@ -1070,8 +1077,7 @@ static void run_one_test(int test_num) state->tested = true; if (verbose() && env.worker_id == -1) - print_test_name(test_num + 1, test->test_name, - test_result(state->error_cnt, state->skip_cnt)); + print_test_result(test, state); reset_affinity(); restore_netns(); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index b090996daee5..3f058dfadbaf 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TEST_PROGS_H +#define __TEST_PROGS_H + #include <stdio.h> #include <unistd.h> #include <errno.h> @@ -210,6 +213,12 @@ int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_FAIL(fmt, args...) ({ \ + static int duration = 0; \ + CHECK(false, "", fmt"\n", ##args); \ + false; \ +}) + #define ASSERT_TRUE(actual, name) ({ \ static int duration = 0; \ bool ___ok = (actual); \ @@ -397,3 +406,27 @@ int write_sysctl(const char *sysctl, const char *value); #endif #define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" + +struct test_loader { + char *log_buf; + size_t log_buf_sz; + + struct bpf_object *obj; +}; + +typedef const void *(*skel_elf_bytes_fn)(size_t *sz); + +extern void test_loader__run_subtests(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory); + +extern void test_loader_fini(struct test_loader *tester); + +#define RUN_TESTS(skel) ({ \ + struct test_loader tester = {}; \ + \ + test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \ + test_loader_fini(&tester); \ +}) + +#endif /* __TEST_PROGS_H */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e768181a1bd7..024a0faafb3b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1690,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; txmsg_redir = 0; + txmsg_ingress = 0; txmsg_apply = 1; txmsg_cork = 0; test_send_one(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; + txmsg_ingress = 0; + txmsg_apply = 1; + txmsg_cork = 0; + test_send_one(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_ingress = 1; txmsg_apply = 1; txmsg_cork = 0; test_send_one(opt, cgrp); txmsg_pass = 1; txmsg_redir = 0; + txmsg_ingress = 0; + txmsg_apply = 1024; + txmsg_cork = 0; + test_send_large(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_ingress = 0; txmsg_apply = 1024; txmsg_cork = 0; test_send_large(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; + txmsg_ingress = 1; txmsg_apply = 1024; txmsg_cork = 0; test_send_large(opt, cgrp); diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index daa7d1b8d309..76f0bd17061f 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -5,6 +5,7 @@ # with dst port = 9000 down to 5MBps. Then it measures actual # throughput of the flow. +BPF_FILE="test_tc_edt.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -54,7 +55,7 @@ ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ - bpf da obj test_tc_edt.o sec cls_test + bpf da obj ${BPF_FILE} sec cls_test # start the listener diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 088fcad138c9..334bdfeab940 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -3,6 +3,7 @@ # # In-place tunneling +BPF_FILE="test_tc_tunnel.bpf.o" # must match the port that the bpf program filters on readonly port=8000 @@ -196,7 +197,7 @@ verify_data # client can no longer connect ip netns exec "${ns1}" tc qdisc add dev veth1 clsact ip netns exec "${ns1}" tc filter add dev veth1 egress \ - bpf direct-action object-file ./test_tc_tunnel.o \ + bpf direct-action object-file ${BPF_FILE} \ section "encap_${tuntype}_${mac}" echo "test bpf encap without decap (expect failure)" server_listen @@ -296,7 +297,7 @@ fi ip netns exec "${ns2}" ip link del dev testtun0 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact ip netns exec "${ns2}" tc filter add dev veth2 ingress \ - bpf direct-action object-file ./test_tc_tunnel.o section decap + bpf direct-action object-file ${BPF_FILE} section decap echo "test bpf encap with bpf decap" client_connect verify_data diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index e9ebc67d73f7..2eaedc1d9ed3 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -45,6 +45,7 @@ # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 6) Forward the packet to the overlay tnl dev +BPF_FILE="test_tunnel_kern.bpf.o" BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" PING_ARG="-c 3 -w 10 -q" ret=0 @@ -545,7 +546,7 @@ test_xfrm_tunnel() > /sys/kernel/debug/tracing/trace setup_xfrm_tunnel mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} tc qdisc add dev veth1 clsact tc filter add dev veth1 proto ip ingress bpf da object-pinned \ ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state @@ -572,7 +573,7 @@ attach_bpf() SET=$2 GET=$3 mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/ + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ tc qdisc add dev $DEV clsact tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b605a70d4f6b..8c808551dfd7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -68,7 +68,6 @@ #define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) #define DEFAULT_LIBBPF_LOG_LEVEL 4 -#define VERBOSE_LIBBPF_LOG_LEVEL 1 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -81,6 +80,7 @@ static bool unpriv_disabled = false; static int skips; static bool verbose = false; +static int verif_log_level = 0; struct kfunc_btf_id_pair { const char *kfunc; @@ -759,7 +759,7 @@ static int load_btf_spec(__u32 *types, int types_len, .log_buf = bpf_vlog, .log_size = sizeof(bpf_vlog), .log_level = (verbose - ? VERBOSE_LIBBPF_LOG_LEVEL + ? verif_log_level : DEFAULT_LIBBPF_LOG_LEVEL), ); @@ -1491,7 +1491,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; + opts.log_level = verif_log_level | 4; /* force stats */ else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else @@ -1746,6 +1746,13 @@ int main(int argc, char **argv) if (argc > 1 && strcmp(argv[1], "-v") == 0) { arg++; verbose = true; + verif_log_level = 1; + argc--; + } + if (argc > 1 && strcmp(argv[1], "-vv") == 0) { + arg++; + verbose = true; + verif_log_level = 2; argc--; } diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index ea69370caae3..2740322c1878 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -1,5 +1,6 @@ #!/bin/sh +BPF_FILE="test_xdp_meta.bpf.o" # Kselftest framework requirement - SKIP code is 4. readonly KSFT_SKIP=4 readonly NS1="ns1-$(mktemp -u XXXXXX)" @@ -42,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 ip netns exec ${NS1} tc qdisc add dev veth1 clsact ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x +ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x ip netns exec ${NS1} ip link set dev veth1 up ip netns exec ${NS2} ip link set dev veth2 up diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index 810c407e0286..fbcaa9f0120b 100755 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -200,11 +200,11 @@ ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First p # ---------------------------------------------------------------------- # In ns1: ingress use XDP to remove VLAN tags export DEVNS1=veth1 -export FILE=test_xdp_vlan.o +export BPF_FILE=test_xdp_vlan.bpf.o # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" export XDP_PROG=xdp_vlan_change -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # In ns1: egress use TC to add back VLAN tag 4011 # (del cmd) @@ -212,7 +212,7 @@ ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PRO # ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ - prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push # Now the namespaces can reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 @@ -226,7 +226,7 @@ ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 # export XDP_PROG=xdp_vlan_remove_outer2 ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # Now the namespaces should still be able reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9c4be2cdb21a..09a16a77bae4 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -23,7 +23,7 @@ static int ksym_cmp(const void *p1, const void *p2) return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; } -int load_kallsyms(void) +int load_kallsyms_refresh(void) { FILE *f; char func[256], buf[256]; @@ -31,12 +31,7 @@ int load_kallsyms(void) void *addr; int i = 0; - /* - * This is called/used from multiplace places, - * load symbols just once. - */ - if (sym_cnt) - return 0; + sym_cnt = 0; f = fopen("/proc/kallsyms", "r"); if (!f) @@ -57,6 +52,17 @@ int load_kallsyms(void) return 0; } +int load_kallsyms(void) +{ + /* + * This is called/used from multiplace places, + * load symbols just once. + */ + if (sym_cnt) + return 0; + return load_kallsyms_refresh(); +} + struct ksym *ksym_search(long key) { int start = 0, end = sym_cnt; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 238a9c98cde2..53efde0e2998 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -10,6 +10,8 @@ struct ksym { }; int load_kallsyms(void); +int load_kallsyms_refresh(void); + struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index e1a937277b54..9d993926bf0e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 expected pointer to ctx, but got PTR", + .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, @@ -109,7 +109,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, { "bpf_kfunc_call_test_release", 5 }, @@ -2305,3 +2305,85 @@ .errstr = "!read_ok", .result = REJECT, }, +/* Make sure that verifier.c:states_equal() considers IDs from all + * frames when building 'idmap' for check_ids(). + */ +{ + "calls: check_ids() across call boundary", + .insns = { + /* Function main() */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24), + /* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32), + /* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current + * ; stack frame + */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32), + BPF_CALL_REL(2), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* Function foo() + * + * r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers, + * r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value + */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_2), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r9 = r8 + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + /* r9 = *r9 ; verifier get's to this point via two paths: + * ; (I) one including r9 = r8, verified first; + * ; (II) one excluding r9 = r8, verified next. + * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id. + * ; Suppose that checkpoint is created here via path (I). + * ; When verifying via (II) the r9.id must be compared against + * ; frame[0].fp[-24].id, otherwise (I) and (II) would be + * ; incorrectly deemed equivalent. + * if r9 == 0 goto <exit> + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1), + /* r8 = *r8 ; read map value via r8, this is not safe + * r0 = *r8 ; because r8 might be not equal to r9. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .fixup_map_hash_8b = { 3, 9 }, + .result = REJECT, + .errstr = "R8 invalid mem access 'map_value_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 11acd1855acf..dce2e28aeb43 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -654,3 +654,57 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "direct packet access: test30 (check_id() in regsafe(), bad access)", + .insns = { + /* r9 = ctx */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r2 = ctx->data + * r3 = ctx->data + * r4 = ctx->data_end + */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9, offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9, offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_9, offsetof(struct __sk_buff, data_end)), + /* if r6 > 100 goto exit + * if r7 > 100 goto exit + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 100, 9), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 100, 8), + /* r2 += r6 ; this forces assignment of ID to r2 + * r2 += 1 ; get some fixed off for r2 + * r3 += r7 ; this forces assignment of ID to r3 + * r3 += 1 ; get some fixed off for r3 + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r2 = r3 ; optionally share ID between r2 and r3 + */ + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + /* if r3 > ctx->data_end goto exit */ + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 1), + /* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2, + * ; this is not always safe + */ + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, -1), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .result = REJECT, + .errstr = "invalid access to packet, off=0 size=1, R2", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c new file mode 100644 index 000000000000..67a1c07ead34 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c @@ -0,0 +1,174 @@ +{ + /* This is equivalent to the following program: + * + * r6 = skb->sk; + * r7 = sk_fullsock(r6); + * r0 = sk_fullsock(r6); + * if (r0 == 0) return 0; (a) + * if (r0 != r7) return 0; (b) + * *r7->type; (c) + * return 0; + * + * It is safe to dereference r7 at point (c), because of (a) and (b). + * The test verifies that relation r0 == r7 is propagated from (b) to (c). + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 == r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch", + .insns = { + /* r6 = skb->sk */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + /* if (r0 == r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as a first test, but not null should be inferred for JEQ branch */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + /* if (r0 != r7) return 0; */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 != r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Maps are treated in a different branch of `mark_ptr_not_null_reg`, + * so separate test for maps case. + */ + "jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE", + .insns = { + /* r9 = &some stack to use as key */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -8), + /* r8 = process local map */ + BPF_LD_MAP_FD(BPF_REG_8, 0), + /* r6 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r7 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), + /* if (r6 != r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1), + /* read *r7; */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_xdp_sock, queue_id)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index 79021c30e51e..8bf37e5207f1 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -21,6 +21,30 @@ .retval = 2, }, { + "jit: lsh, rsh, arsh by reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0), + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_1), + BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ "jit: mov32 for ldimm64, 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 2), diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index 1f82021429bf..17ee84dc7766 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -9,7 +9,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "R1 is bpf_array invalid negative access: off=-8", }, @@ -26,7 +26,7 @@ }, .fixup_map_array_48b = { 3 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "only read from bpf_array is supported", }, @@ -41,7 +41,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -57,7 +57,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = ACCEPT, .retval = 1, }, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index fd683a32a276..9540164712b7 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -142,7 +142,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_user_key", 2 }, { "bpf_key_put", 4 }, @@ -163,7 +163,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_system_key", 1 }, { "bpf_key_put", 3 }, diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c index b64d33e4833c..92e3f6a61a79 100644 --- a/tools/testing/selftests/bpf/verifier/ringbuf.c +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -28,7 +28,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "dereference of modified alloc_mem ptr R1", + .errstr = "R1 must have zero offset when passed to release func", }, { "ringbuf: invalid reservation offset 2", diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index e23f07175e1b..9bb302dade23 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -84,7 +84,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", + .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited", }, { "check corrupted spill/fill", diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c index 781621facae4..eaf114f07e2e 100644 --- a/tools/testing/selftests/bpf/verifier/spin_lock.c +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -331,3 +331,117 @@ .errstr = "inside bpf_spin_lock", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "spin_lock: regsafe compare reg->id for map value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_6, offsetof(struct __sk_buff, mark)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 2 }, + .result = REJECT, + .errstr = "bpf_spin_unlock of different lock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = BPF_F_TEST_STATE_FREQ, +}, +/* Make sure that regsafe() compares ids for spin lock records using + * check_ids(): + * 1: r9 = map_lookup_elem(...) ; r9.id == 1 + * 2: r8 = map_lookup_elem(...) ; r8.id == 2 + * 3: r7 = ktime_get_ns() + * 4: r6 = ktime_get_ns() + * 5: if r6 > r7 goto <9> + * 6: spin_lock(r8) + * 7: r9 = r8 + * 8: goto <10> + * 9: spin_lock(r9) + * 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active, + * ; second visit to (10) should be considered safe + * ; if check_ids() is used. + * 11: exit(0) + */ +{ + "spin_lock: regsafe() check_ids() similar id mappings", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + /* r9 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 24), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + /* r8 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 18), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +5 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * spin_lock(r8) + * r9 = r8 + * goto unlock + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + BPF_JMP_A(3), + /* spin_lock(r9) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + /* spin_unlock(r9) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_unlock), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3, 10 }, + .result = VERBOSE_ACCEPT, + .errstr = "28: safe", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = BPF_F_TEST_STATE_FREQ, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c index 3ecb70a3d939..52a8bca14f03 100644 --- a/tools/testing/selftests/bpf/verifier/value_or_null.c +++ b/tools/testing/selftests/bpf/verifier/value_or_null.c @@ -169,3 +169,52 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, +{ + "MAP_VALUE_OR_NULL check_ids() in regsafe()", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* r9 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + /* r8 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r9 = r8 ; optionally share ID between r9 and r8 + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + /* if r9 == 0 goto <exit> */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1), + /* read map value via r8, this is not always + * safe because r8 might be not equal to r9. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .fixup_map_hash_8b = { 3, 9 }, + .result = REJECT, + .errstr = "R8 invalid mem access 'map_value_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b0d83a28e348..f961b49b8ef4 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -17,6 +17,7 @@ #include <bpf/libbpf.h> #include <libelf.h> #include <gelf.h> +#include <float.h> enum stat_id { VERDICT, @@ -34,6 +35,45 @@ enum stat_id { NUM_STATS_CNT = FILE_NAME - VERDICT, }; +/* In comparison mode each stat can specify up to four different values: + * - A side value; + * - B side value; + * - absolute diff value; + * - relative (percentage) diff value. + * + * When specifying stat specs in comparison mode, user can use one of the + * following variant suffixes to specify which exact variant should be used for + * ordering or filtering: + * - `_a` for A side value; + * - `_b` for B side value; + * - `_diff` for absolute diff value; + * - `_pct` for relative (percentage) diff value. + * + * If no variant suffix is provided, then `_b` (control data) is assumed. + * + * As an example, let's say instructions stat has the following output: + * + * Insns (A) Insns (B) Insns (DIFF) + * --------- --------- -------------- + * 21547 20920 -627 (-2.91%) + * + * Then: + * - 21547 is A side value (insns_a); + * - 20920 is B side value (insns_b); + * - -627 is absolute diff value (insns_diff); + * - -2.91% is relative diff value (insns_pct). + * + * For verdict there is no verdict_pct variant. + * For file and program name, _a and _b variants are equivalent and there are + * no _diff or _pct variants. + */ +enum stat_variant { + VARIANT_A, + VARIANT_B, + VARIANT_DIFF, + VARIANT_PCT, +}; + struct verif_stats { char *file_name; char *prog_name; @@ -41,9 +81,19 @@ struct verif_stats { long stats[NUM_STATS_CNT]; }; +/* joined comparison mode stats */ +struct verif_stats_join { + char *file_name; + char *prog_name; + + const struct verif_stats *stats_a; + const struct verif_stats *stats_b; +}; + struct stat_specs { int spec_cnt; enum stat_id ids[ALL_STATS_CNT]; + enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -54,9 +104,31 @@ enum resfmt { RESFMT_CSV, }; +enum filter_kind { + FILTER_NAME, + FILTER_STAT, +}; + +enum operator_kind { + OP_EQ, /* == or = */ + OP_NEQ, /* != or <> */ + OP_LT, /* < */ + OP_LE, /* <= */ + OP_GT, /* > */ + OP_GE, /* >= */ +}; + struct filter { + enum filter_kind kind; + /* FILTER_NAME */ + char *any_glob; char *file_glob; char *prog_glob; + /* FILTER_STAT */ + enum operator_kind op; + int stat_id; + enum stat_variant stat_var; + long value; }; static struct env { @@ -67,6 +139,7 @@ static struct env { int log_level; enum resfmt out_fmt; bool comparison_mode; + bool replay_mode; struct verif_stats *prog_stats; int prog_stat_cnt; @@ -75,6 +148,9 @@ static struct env { struct verif_stats *baseline_stats; int baseline_stat_cnt; + struct verif_stats_join *join_stats; + int join_stat_cnt; + struct stat_specs output_spec; struct stat_specs sort_spec; @@ -115,6 +191,7 @@ static const struct argp_option opts[] = { { "sort", 's', "SPEC", 0, "Specify sort order" }, { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." }, { "compare", 'C', NULL, 0, "Comparison mode" }, + { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, {}, }; @@ -169,6 +246,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'C': env.comparison_mode = true; break; + case 'R': + env.replay_mode = true; + break; case 'f': if (arg[0] == '@') err = append_filter_file(arg + 1); @@ -226,28 +306,6 @@ static bool glob_matches(const char *str, const char *pat) return !*str && !*pat; } -static bool should_process_file(const char *filename) -{ - int i; - - if (env.deny_filter_cnt > 0) { - for (i = 0; i < env.deny_filter_cnt; i++) { - if (glob_matches(filename, env.deny_filters[i].file_glob)) - return false; - } - } - - if (env.allow_filter_cnt == 0) - return true; - - for (i = 0; i < env.allow_filter_cnt; i++) { - if (glob_matches(filename, env.allow_filters[i].file_glob)) - return true; - } - - return false; -} - static bool is_bpf_obj_file(const char *path) { Elf64_Ehdr *ehdr; int fd, err = -EINVAL; @@ -280,45 +338,84 @@ cleanup: return err == 0; } -static bool should_process_prog(const char *path, const char *prog_name) +static bool should_process_file_prog(const char *filename, const char *prog_name) { - const char *filename = basename(path); - int i; + struct filter *f; + int i, allow_cnt = 0; - if (env.deny_filter_cnt > 0) { - for (i = 0; i < env.deny_filter_cnt; i++) { - if (glob_matches(filename, env.deny_filters[i].file_glob)) - return false; - if (!env.deny_filters[i].prog_glob) - continue; - if (glob_matches(prog_name, env.deny_filters[i].prog_glob)) - return false; - } - } + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_NAME) + continue; - if (env.allow_filter_cnt == 0) - return true; + if (f->any_glob && glob_matches(filename, f->any_glob)) + return false; + if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob)) + return false; + if (f->file_glob && glob_matches(filename, f->file_glob)) + return false; + if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob)) + return false; + } for (i = 0; i < env.allow_filter_cnt; i++) { - if (!glob_matches(filename, env.allow_filters[i].file_glob)) + f = &env.allow_filters[i]; + if (f->kind != FILTER_NAME) continue; - /* if filter specifies only filename glob part, it implicitly - * allows all progs within that file - */ - if (!env.allow_filters[i].prog_glob) - return true; - if (glob_matches(prog_name, env.allow_filters[i].prog_glob)) + + allow_cnt++; + if (f->any_glob) { + if (glob_matches(filename, f->any_glob)) + return true; + /* If we don't know program name yet, any_glob filter + * has to assume that current BPF object file might be + * relevant; we'll check again later on after opening + * BPF object file, at which point program name will + * be known finally. + */ + if (!prog_name || glob_matches(prog_name, f->any_glob)) + return true; + } else { + if (f->file_glob && !glob_matches(filename, f->file_glob)) + continue; + if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob)) + continue; return true; + } } - return false; + /* if there are no file/prog name allow filters, allow all progs, + * unless they are denied earlier explicitly + */ + return allow_cnt == 0; } +static struct { + enum operator_kind op_kind; + const char *op_str; +} operators[] = { + /* Order of these definitions matter to avoid situations like '<' + * matching part of what is actually a '<>' operator. That is, + * substrings should go last. + */ + { OP_EQ, "==" }, + { OP_NEQ, "!=" }, + { OP_NEQ, "<>" }, + { OP_LE, "<=" }, + { OP_LT, "<" }, + { OP_GE, ">=" }, + { OP_GT, ">" }, + { OP_EQ, "=" }, +}; + +static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); + static int append_filter(struct filter **filters, int *cnt, const char *str) { struct filter *f; void *tmp; const char *p; + int i; tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters)); if (!tmp) @@ -326,26 +423,108 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) *filters = tmp; f = &(*filters)[*cnt]; - f->file_glob = f->prog_glob = NULL; + memset(f, 0, sizeof(*f)); + + /* First, let's check if it's a stats filter of the following form: + * <stat><op><value, where: + * - <stat> is one of supported numerical stats (verdict is also + * considered numerical, failure == 0, success == 1); + * - <op> is comparison operator (see `operators` definitions); + * - <value> is an integer (or failure/success, or false/true as + * special aliases for 0 and 1, respectively). + * If the form doesn't match what user provided, we assume file/prog + * glob filter. + */ + for (i = 0; i < ARRAY_SIZE(operators); i++) { + enum stat_variant var; + int id; + long val; + const char *end = str; + const char *op_str; + + op_str = operators[i].op_str; + p = strstr(str, op_str); + if (!p) + continue; + + if (!parse_stat_id_var(str, p - str, &id, &var)) { + fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); + return -EINVAL; + } + if (id >= FILE_NAME) { + fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str); + return -EINVAL; + } + + p += strlen(op_str); + + if (strcasecmp(p, "true") == 0 || + strcasecmp(p, "t") == 0 || + strcasecmp(p, "success") == 0 || + strcasecmp(p, "succ") == 0 || + strcasecmp(p, "s") == 0 || + strcasecmp(p, "match") == 0 || + strcasecmp(p, "m") == 0) { + val = 1; + } else if (strcasecmp(p, "false") == 0 || + strcasecmp(p, "f") == 0 || + strcasecmp(p, "failure") == 0 || + strcasecmp(p, "fail") == 0 || + strcasecmp(p, "mismatch") == 0 || + strcasecmp(p, "mis") == 0) { + val = 0; + } else { + errno = 0; + val = strtol(p, (char **)&end, 10); + if (errno || end == p || *end != '\0' ) { + fprintf(stderr, "Invalid integer value in '%s'!\n", str); + return -EINVAL; + } + } + + f->kind = FILTER_STAT; + f->stat_id = id; + f->stat_var = var; + f->op = operators[i].op_kind; + f->value = val; + + *cnt += 1; + return 0; + } - /* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */ + /* File/prog filter can be specified either as '<glob>' or + * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to + * both file and program names. This seems to be way more useful in + * practice. If user needs full control, they can use '/<prog-glob>' + * form to glob just program name, or '<file-glob>/' to glob only file + * name. But usually common <glob> seems to be the most useful and + * ergonomic way. + */ + f->kind = FILTER_NAME; p = strchr(str, '/'); if (!p) { - f->file_glob = strdup(str); - if (!f->file_glob) + f->any_glob = strdup(str); + if (!f->any_glob) return -ENOMEM; } else { - f->file_glob = strndup(str, p - str); - f->prog_glob = strdup(p + 1); - if (!f->file_glob || !f->prog_glob) { - free(f->file_glob); - free(f->prog_glob); - f->file_glob = f->prog_glob = NULL; - return -ENOMEM; + if (str != p) { + /* non-empty file glob */ + f->file_glob = strndup(str, p - str); + if (!f->file_glob) + return -ENOMEM; + } + if (strlen(p + 1) > 0) { + /* non-empty prog glob */ + f->prog_glob = strdup(p + 1); + if (!f->prog_glob) { + free(f->file_glob); + f->file_glob = NULL; + return -ENOMEM; + } } } - *cnt = *cnt + 1; + *cnt += 1; return 0; } @@ -388,6 +567,15 @@ static const struct stat_specs default_output_spec = { }, }; +static const struct stat_specs default_csv_output_spec = { + .spec_cnt = 9, + .ids = { + FILE_NAME, PROG_NAME, VERDICT, DURATION, + TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, + MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + }, +}; + static const struct stat_specs default_sort_spec = { .spec_cnt = 2, .ids = { @@ -396,48 +584,123 @@ static const struct stat_specs default_sort_spec = { .asc = { true, true, }, }; +/* sorting for comparison mode to join two data sets */ +static const struct stat_specs join_sort_spec = { + .spec_cnt = 2, + .ids = { + FILE_NAME, PROG_NAME, + }, + .asc = { true, true, }, +}; + static struct stat_def { const char *header; const char *names[4]; bool asc_by_default; + bool left_aligned; } stat_defs[] = { - [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ }, - [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ }, - [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ }, + [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ }, + [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ }, + [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ }, [DURATION] = { "Duration (us)", {"duration", "dur"}, }, - [TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, }, - [TOTAL_STATES] = { "Total states", {"total_states", "states"}, }, + [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, }, + [TOTAL_STATES] = { "States", {"total_states", "states"}, }, [PEAK_STATES] = { "Peak states", {"peak_states"}, }, [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; +static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +{ + static const char *var_sfxs[] = { + [VARIANT_A] = "_a", + [VARIANT_B] = "_b", + [VARIANT_DIFF] = "_diff", + [VARIANT_PCT] = "_pct", + }; + int i, j, k; + + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { + struct stat_def *def = &stat_defs[i]; + size_t alias_len, sfx_len; + const char *alias; + + for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) { + alias = def->names[j]; + if (!alias) + continue; + + alias_len = strlen(alias); + if (strncmp(name, alias, alias_len) != 0) + continue; + + if (alias_len == len) { + /* If no variant suffix is specified, we + * assume control group (just in case we are + * in comparison mode. Variant is ignored in + * non-comparison mode. + */ + *var = VARIANT_B; + *id = i; + return true; + } + + for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) { + sfx_len = strlen(var_sfxs[k]); + if (alias_len + sfx_len != len) + continue; + + if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) { + *var = (enum stat_variant)k; + *id = i; + return true; + } + } + } + } + + return false; +} + +static bool is_asc_sym(char c) +{ + return c == '^'; +} + +static bool is_desc_sym(char c) +{ + return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; +} + static int parse_stat(const char *stat_name, struct stat_specs *specs) { - int id, i; + int id; + bool has_order = false, is_asc = false; + size_t len = strlen(stat_name); + enum stat_variant var; if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) { fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids)); return -E2BIG; } - for (id = 0; id < ARRAY_SIZE(stat_defs); id++) { - struct stat_def *def = &stat_defs[id]; - - for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) { - if (!def->names[i] || strcmp(def->names[i], stat_name) != 0) - continue; - - specs->ids[specs->spec_cnt] = id; - specs->asc[specs->spec_cnt] = def->asc_by_default; - specs->spec_cnt++; + if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) { + has_order = true; + is_asc = is_asc_sym(stat_name[len - 1]); + len -= 1; + } - return 0; - } + if (!parse_stat_id_var(stat_name, len, &id, &var)) { + fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); + return -ESRCH; } - fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); - return -ESRCH; + specs->ids[specs->spec_cnt] = id; + specs->variants[specs->spec_cnt] = var; + specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->spec_cnt++; + + return 0; } static int parse_stats(const char *stats_str, struct stat_specs *specs) @@ -509,6 +772,28 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +static void fixup_obj(struct bpf_object *obj) +{ + struct bpf_map *map; + + bpf_object__for_each_map(map, obj) { + /* disable pinning */ + bpf_map__set_pin_path(map, NULL); + + /* fix up map size, if necessary */ + switch (bpf_map__type(map)) { + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_CGROUP_STORAGE: + break; + default: + if (bpf_map__max_entries(map) == 0) + bpf_map__set_max_entries(map, 1); + } + } +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *prog_name = bpf_program__name(prog); @@ -518,7 +803,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf int err = 0; void *tmp; - if (!should_process_prog(filename, bpf_program__name(prog))) { + if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) { env.progs_skipped++; return 0; } @@ -543,6 +828,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf } verif_log_buf[0] = '\0'; + /* increase chances of successful BPF object loading */ + fixup_obj(obj); + err = bpf_object__load(obj); env.progs_processed++; @@ -571,7 +859,7 @@ static int process_obj(const char *filename) LIBBPF_OPTS(bpf_object_open_opts, opts); int err = 0, prog_cnt = 0; - if (!should_process_file(basename(filename))) { + if (!should_process_file_prog(basename(filename), NULL)) { if (env.verbose) printf("Skipping '%s' due to filters...\n", filename); env.files_skipped++; @@ -691,7 +979,106 @@ static int cmp_prog_stats(const void *v1, const void *v2) return cmp; } - return 0; + /* always disambiguate with file+prog, which are unique */ + cmp = strcmp(s1->file_name, s2->file_name); + if (cmp != 0) + return cmp; + return strcmp(s1->prog_name, s2->prog_name); +} + +static void fetch_join_stat_value(const struct verif_stats_join *s, + enum stat_id id, enum stat_variant var, + const char **str_val, + double *num_val) +{ + long v1, v2; + + if (id == FILE_NAME) { + *str_val = s->file_name; + return; + } + if (id == PROG_NAME) { + *str_val = s->prog_name; + return; + } + + v1 = s->stats_a ? s->stats_a->stats[id] : 0; + v2 = s->stats_b ? s->stats_b->stats[id] : 0; + + switch (var) { + case VARIANT_A: + if (!s->stats_a) + *num_val = -DBL_MAX; + else + *num_val = s->stats_a->stats[id]; + return; + case VARIANT_B: + if (!s->stats_b) + *num_val = -DBL_MAX; + else + *num_val = s->stats_b->stats[id]; + return; + case VARIANT_DIFF: + if (!s->stats_a || !s->stats_b) + *num_val = -DBL_MAX; + else if (id == VERDICT) + *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */; + else + *num_val = (double)(v2 - v1); + return; + case VARIANT_PCT: + if (!s->stats_a || !s->stats_b) { + *num_val = -DBL_MAX; + } else if (v1 == 0) { + if (v1 == v2) + *num_val = 0.0; + else + *num_val = v2 < v1 ? -100.0 : 100.0; + } else { + *num_val = (v2 - v1) * 100.0 / v1; + } + return; + } +} + +static int cmp_join_stat(const struct verif_stats_join *s1, + const struct verif_stats_join *s2, + enum stat_id id, enum stat_variant var, bool asc) +{ + const char *str1 = NULL, *str2 = NULL; + double v1, v2; + int cmp = 0; + + fetch_join_stat_value(s1, id, var, &str1, &v1); + fetch_join_stat_value(s2, id, var, &str2, &v2); + + if (str1) + cmp = strcmp(str1, str2); + else if (v1 != v2) + cmp = v1 < v2 ? -1 : 1; + + return asc ? cmp : -cmp; +} + +static int cmp_join_stats(const void *v1, const void *v2) +{ + const struct verif_stats_join *s1 = v1, *s2 = v2; + int i, cmp; + + for (i = 0; i < env.sort_spec.spec_cnt; i++) { + cmp = cmp_join_stat(s1, s2, + env.sort_spec.ids[i], + env.sort_spec.variants[i], + env.sort_spec.asc[i]); + if (cmp != 0) + return cmp; + } + + /* always disambiguate with file+prog, which are unique */ + cmp = strcmp(s1->file_name, s2->file_name); + if (cmp != 0) + return cmp; + return strcmp(s1->prog_name, s2->prog_name); } #define HEADER_CHAR '-' @@ -713,6 +1100,7 @@ static void output_header_underlines(void) static void output_headers(enum resfmt fmt) { + const char *fmt_str; int i, len; for (i = 0; i < env.output_spec.spec_cnt; i++) { @@ -726,7 +1114,8 @@ static void output_headers(enum resfmt fmt) *max_len = len; break; case RESFMT_TABLE: - printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); + fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s"; + printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); if (i == env.output_spec.spec_cnt - 1) printf("\n"); break; @@ -747,13 +1136,16 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, { switch (id) { case FILE_NAME: - *str = s->file_name; + *str = s ? s->file_name : "N/A"; break; case PROG_NAME: - *str = s->prog_name; + *str = s ? s->prog_name : "N/A"; break; case VERDICT: - *str = s->stats[VERDICT] ? "success" : "failure"; + if (!s) + *str = "N/A"; + else + *str = s->stats[VERDICT] ? "success" : "failure"; break; case DURATION: case TOTAL_INSNS: @@ -761,7 +1153,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case PEAK_STATES: case MAX_STATES_PER_INSN: case MARK_READ_MAX_LEN: - *val = s->stats[id]; + *val = s ? s->stats[id] : 0; break; default: fprintf(stderr, "Unrecognized stat #%d\n", id); @@ -816,42 +1208,6 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last } } -static int handle_verif_mode(void) -{ - int i, err; - - if (env.filename_cnt == 0) { - fprintf(stderr, "Please provide path to BPF object file!\n"); - argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); - return -EINVAL; - } - - for (i = 0; i < env.filename_cnt; i++) { - err = process_obj(env.filenames[i]); - if (err) { - fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; - } - } - - qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); - - if (env.out_fmt == RESFMT_TABLE) { - /* calculate column widths */ - output_headers(RESFMT_TABLE_CALCLEN); - for (i = 0; i < env.prog_stat_cnt; i++) - output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false); - } - - /* actually output the table */ - output_headers(env.out_fmt); - for (i = 0; i < env.prog_stat_cnt; i++) { - output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1); - } - - return 0; -} - static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st) { switch (id) { @@ -983,7 +1339,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs, * parsed entire line; if row should be ignored we pretend we * never parsed it */ - if (!should_process_prog(st->file_name, st->prog_name)) { + if (!should_process_file_prog(st->file_name, st->prog_name)) { free(st->file_name); free(st->prog_name); *stat_cntp -= 1; @@ -1072,9 +1428,11 @@ static void output_comp_headers(enum resfmt fmt) output_comp_header_underlines(); } -static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp, +static void output_comp_stats(const struct verif_stats_join *join_stats, enum resfmt fmt, bool last) { + const struct verif_stats *base = join_stats->stats_a; + const struct verif_stats *comp = join_stats->stats_b; char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {}; int i; @@ -1092,28 +1450,44 @@ static void output_comp_stats(const struct verif_stats *base, const struct verif /* normalize all the outputs to be in string buffers for simplicity */ if (is_key_stat(id)) { /* key stats (file and program name) are always strings */ - if (base != &fallback_stats) + if (base) snprintf(base_buf, sizeof(base_buf), "%s", base_str); else snprintf(base_buf, sizeof(base_buf), "%s", comp_str); } else if (base_str) { snprintf(base_buf, sizeof(base_buf), "%s", base_str); snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str); - if (strcmp(base_str, comp_str) == 0) + if (!base || !comp) + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); + else if (strcmp(base_str, comp_str) == 0) snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH"); else snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH"); } else { - snprintf(base_buf, sizeof(base_buf), "%ld", base_val); - snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); + double p = 0.0; + + if (base) + snprintf(base_buf, sizeof(base_buf), "%ld", base_val); + else + snprintf(base_buf, sizeof(base_buf), "%s", "N/A"); + if (comp) + snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); + else + snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A"); diff_val = comp_val - base_val; - if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) { - snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", - diff_val, comp_val < base_val ? -100.0 : 100.0); + if (!base || !comp) { + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); } else { - snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", - diff_val, diff_val * 100.0 / base_val); + if (base_val == 0) { + if (comp_val == base_val) + p = 0.0; /* avoid +0 (+100%) case */ + else + p = comp_val < base_val ? -100.0 : 100.0; + } else { + p = diff_val * 100.0 / base_val; + } + snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p); } } @@ -1170,14 +1544,64 @@ static int cmp_stats_key(const struct verif_stats *base, const struct verif_stat return strcmp(base->prog_name, comp->prog_name); } +static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats) +{ + static const double eps = 1e-9; + const char *str = NULL; + double value = 0.0; + + fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + + switch (f->op) { + case OP_EQ: return value > f->value - eps && value < f->value + eps; + case OP_NEQ: return value < f->value - eps || value > f->value + eps; + case OP_LT: return value < f->value - eps; + case OP_LE: return value <= f->value + eps; + case OP_GT: return value > f->value + eps; + case OP_GE: return value >= f->value - eps; + } + + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); + return false; +} + +static bool should_output_join_stats(const struct verif_stats_join *stats) +{ + struct filter *f; + int i, allow_cnt = 0; + + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_STAT) + continue; + + if (is_join_stat_filter_matched(f, stats)) + return false; + } + + for (i = 0; i < env.allow_filter_cnt; i++) { + f = &env.allow_filters[i]; + if (f->kind != FILTER_STAT) + continue; + allow_cnt++; + + if (is_join_stat_filter_matched(f, stats)) + return true; + } + + /* if there are no stat allowed filters, pass everything through */ + return allow_cnt == 0; +} + static int handle_comparison_mode(void) { struct stat_specs base_specs = {}, comp_specs = {}; + struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j; + int err, i, j, last_idx; if (env.filename_cnt != 2) { - fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n"); + fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); return -EINVAL; } @@ -1215,31 +1639,26 @@ static int handle_comparison_mode(void) } } + /* Replace user-specified sorting spec with file+prog sorting rule to + * be able to join two datasets correctly. Once we are done, we will + * restore the original sort spec. + */ + tmp_sort_spec = env.sort_spec; + env.sort_spec = join_sort_spec; qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats); + env.sort_spec = tmp_sort_spec; - /* for human-readable table output we need to do extra pass to - * calculate column widths, so we substitute current output format - * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE - * and do everything again. - */ - if (env.out_fmt == RESFMT_TABLE) - cur_fmt = RESFMT_TABLE_CALCLEN; - else - cur_fmt = env.out_fmt; - -one_more_time: - output_comp_headers(cur_fmt); - - /* If baseline and comparison datasets have different subset of rows - * (we match by 'object + prog' as a unique key) then assume - * empty/missing/zero value for rows that are missing in the opposite - * data set + /* Join two datasets together. If baseline and comparison datasets + * have different subset of rows (we match by 'object + prog' as + * a unique key) then assume empty/missing/zero value for rows that + * are missing in the opposite data set. */ i = j = 0; while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) { - bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1); const struct verif_stats *base, *comp; + struct verif_stats_join *join; + void *tmp; int r; base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats; @@ -1256,18 +1675,64 @@ one_more_time: return -EINVAL; } + tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats)); + if (!tmp) + return -ENOMEM; + env.join_stats = tmp; + + join = &env.join_stats[env.join_stat_cnt]; + memset(join, 0, sizeof(*join)); + r = cmp_stats_key(base, comp); if (r == 0) { - output_comp_stats(base, comp, cur_fmt, last); + join->file_name = base->file_name; + join->prog_name = base->prog_name; + join->stats_a = base; + join->stats_b = comp; i++; j++; } else if (comp == &fallback_stats || r < 0) { - output_comp_stats(base, &fallback_stats, cur_fmt, last); + join->file_name = base->file_name; + join->prog_name = base->prog_name; + join->stats_a = base; + join->stats_b = NULL; i++; } else { - output_comp_stats(&fallback_stats, comp, cur_fmt, last); + join->file_name = comp->file_name; + join->prog_name = comp->prog_name; + join->stats_a = NULL; + join->stats_b = comp; j++; } + env.join_stat_cnt += 1; + } + + /* now sort joined results accorsing to sort spec */ + qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); + + /* for human-readable table output we need to do extra pass to + * calculate column widths, so we substitute current output format + * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE + * and do everything again. + */ + if (env.out_fmt == RESFMT_TABLE) + cur_fmt = RESFMT_TABLE_CALCLEN; + else + cur_fmt = env.out_fmt; + +one_more_time: + output_comp_headers(cur_fmt); + + for (i = 0; i < env.join_stat_cnt; i++) { + const struct verif_stats_join *join = &env.join_stats[i]; + + if (!should_output_join_stats(join)) + continue; + + if (cur_fmt == RESFMT_TABLE_CALCLEN) + last_idx = i; + + output_comp_stats(join, cur_fmt, i == last_idx); } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1278,6 +1743,128 @@ one_more_time: return 0; } +static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats) +{ + long value = stats->stats[f->stat_id]; + + switch (f->op) { + case OP_EQ: return value == f->value; + case OP_NEQ: return value != f->value; + case OP_LT: return value < f->value; + case OP_LE: return value <= f->value; + case OP_GT: return value > f->value; + case OP_GE: return value >= f->value; + } + + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); + return false; +} + +static bool should_output_stats(const struct verif_stats *stats) +{ + struct filter *f; + int i, allow_cnt = 0; + + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_STAT) + continue; + + if (is_stat_filter_matched(f, stats)) + return false; + } + + for (i = 0; i < env.allow_filter_cnt; i++) { + f = &env.allow_filters[i]; + if (f->kind != FILTER_STAT) + continue; + allow_cnt++; + + if (is_stat_filter_matched(f, stats)) + return true; + } + + /* if there are no stat allowed filters, pass everything through */ + return allow_cnt == 0; +} + +static void output_prog_stats(void) +{ + const struct verif_stats *stats; + int i, last_stat_idx = 0; + + if (env.out_fmt == RESFMT_TABLE) { + /* calculate column widths */ + output_headers(RESFMT_TABLE_CALCLEN); + for (i = 0; i < env.prog_stat_cnt; i++) { + stats = &env.prog_stats[i]; + if (!should_output_stats(stats)) + continue; + output_stats(stats, RESFMT_TABLE_CALCLEN, false); + last_stat_idx = i; + } + } + + /* actually output the table */ + output_headers(env.out_fmt); + for (i = 0; i < env.prog_stat_cnt; i++) { + stats = &env.prog_stats[i]; + if (!should_output_stats(stats)) + continue; + output_stats(stats, env.out_fmt, i == last_stat_idx); + } +} + +static int handle_verif_mode(void) +{ + int i, err; + + if (env.filename_cnt == 0) { + fprintf(stderr, "Please provide path to BPF object file!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + for (i = 0; i < env.filename_cnt; i++) { + err = process_obj(env.filenames[i]); + if (err) { + fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); + return err; + } + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + + output_prog_stats(); + + return 0; +} + +static int handle_replay_mode(void) +{ + struct stat_specs specs = {}; + int err; + + if (env.filename_cnt != 1) { + fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + err = parse_stats_csv(env.filenames[0], &specs, + &env.prog_stats, &env.prog_stat_cnt); + if (err) { + fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err); + return err; + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + + output_prog_stats(); + + return 0; +} + int main(int argc, char **argv) { int err = 0, i; @@ -1286,34 +1873,49 @@ int main(int argc, char **argv) return 1; if (env.verbose && env.quiet) { - fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n"); + fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n"); argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); return 1; } if (env.verbose && env.log_level == 0) env.log_level = 1; - if (env.output_spec.spec_cnt == 0) - env.output_spec = default_output_spec; + if (env.output_spec.spec_cnt == 0) { + if (env.out_fmt == RESFMT_CSV) + env.output_spec = default_csv_output_spec; + else + env.output_spec = default_output_spec; + } if (env.sort_spec.spec_cnt == 0) env.sort_spec = default_sort_spec; + if (env.comparison_mode && env.replay_mode) { + fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return 1; + } + if (env.comparison_mode) err = handle_comparison_mode(); + else if (env.replay_mode) + err = handle_replay_mode(); else err = handle_verif_mode(); free_verif_stats(env.prog_stats, env.prog_stat_cnt); free_verif_stats(env.baseline_stats, env.baseline_stat_cnt); + free(env.join_stats); for (i = 0; i < env.filename_cnt; i++) free(env.filenames[i]); free(env.filenames); for (i = 0; i < env.allow_filter_cnt; i++) { + free(env.allow_filters[i].any_glob); free(env.allow_filters[i].file_glob); free(env.allow_filters[i].prog_glob); } free(env.allow_filters); for (i = 0; i < env.deny_filter_cnt; i++) { + free(env.deny_filters[i].any_glob); free(env.deny_filters[i].file_glob); free(env.deny_filters[i].prog_glob); } diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index a29aa05ebb3e..316a56d680f2 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -21,6 +21,12 @@ x86_64) QEMU_FLAGS=(-cpu host -smp 8) BZIMAGE="arch/x86/boot/bzImage" ;; +aarch64) + QEMU_BINARY=qemu-system-aarch64 + QEMU_CONSOLE="ttyAMA0,115200" + QEMU_FLAGS=(-M virt,gic-version=3 -cpu host -smp 8) + BZIMAGE="arch/arm64/boot/Image" + ;; *) echo "Unsupported architecture" exit 1 diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c index ff35320d2be9..410a1385a01d 100644 --- a/tools/testing/selftests/bpf/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -104,7 +104,8 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * { "tc", no_argument, NULL, 'c' }, { NULL, 0, NULL, 0 }, }; - unsigned long mss4, mss6, wscale, ttl; + unsigned long mss4, wscale, ttl; + unsigned long long mss6; unsigned int tcpipopts_mask = 0; if (argc < 2) @@ -286,7 +287,7 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports prog_info = (struct bpf_prog_info) { .nr_map_ids = 8, - .map_ids = (__u64)map_ids, + .map_ids = (__u64)(unsigned long)map_ids, }; info_len = sizeof(prog_info); diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index 0b3ff49c740d..39d349509ba4 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -33,6 +33,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> #include "xsk.h" +#include "bpf_util.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -521,25 +522,6 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk) return 0; } -/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst - * is zero-terminated string no matter what (unless sz == 0, in which case - * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs - * in what is returned. Given this is internal helper, it's trivial to extend - * this, when necessary. Use this instead of strncpy inside libbpf source code. - */ -static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) -{ - size_t i; - - if (sz == 0) - return; - - sz--; - for (i = 0; i < sz && src[i]; i++) - dst[i] = src[i]; - dst[i] = '\0'; -} - static int xsk_get_max_queues(struct xsk_socket *xsk) { struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; @@ -552,7 +534,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) return -errno; ifr.ifr_data = (void *)&channels; - libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); + bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; @@ -771,7 +753,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) } ctx->ifindex = ifindex; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); xsk->ctx = ctx; xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); @@ -958,7 +940,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->refcount = 1; ctx->umem = umem; ctx->queue_id = queue_id; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); ctx->fill = fill; ctx->comp = comp; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 681a5db80dae..162d3a516f2c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -1006,7 +1006,8 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd { struct xsk_socket_info *xsk = ifobject->xsk; bool use_poll = ifobject->use_poll; - u32 i, idx = 0, ret, valid_pkts = 0; + u32 i, idx = 0, valid_pkts = 0; + int ret; while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 4c52cc6f2f9c..e8bbbdb77e0d 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -555,6 +555,7 @@ int proc_mount_contains(const char *option) ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; + ssize_t ret; if (!pid) snprintf(path, sizeof(path), "/proc/%s/%s", @@ -562,8 +563,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t else snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - size = read_text(path, buf, size); - return size < 0 ? -1 : size; + ret = read_text(path, buf, size); + return ret < 0 ? -1 : ret; } int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 526d2c42d870..186e1c26867e 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -16,7 +16,12 @@ skip_test() { [[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" # Set sched verbose flag, if available -[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose +if [[ -d /sys/kernel/debug/sched ]] +then + # Used to restore the original setting during cleanup + SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose) + echo Y > /sys/kernel/debug/sched/verbose +fi # Get wait_inotify location WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify @@ -25,7 +30,7 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') [[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!" -CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//") +CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") [[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" # Set verbose flag and delay factor @@ -54,6 +59,15 @@ echo +cpuset > cgroup.subtree_control [[ -d test ]] || mkdir test cd test +cleanup() +{ + online_cpus + rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 + cd .. + rmdir test > /dev/null 2>&1 + echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose +} + # Pause in ms pause() { @@ -666,6 +680,7 @@ test_inotify() fi } +trap cleanup 0 2 3 6 run_state_test TEST_MATRIX test_isolated test_inotify diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 22b31ebb3513..258ddc565deb 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -19,12 +19,12 @@ /* - * Memory cgroup charging is performed using percpu batches 32 pages + * Memory cgroup charging is performed using percpu batches 64 pages * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So * the maximum discrepancy between charge and vmstat entries is number - * of cpus multiplied by 32 pages. + * of cpus multiplied by 64 pages. */ -#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs()) +#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) static int alloc_dcache(const char *cgroup, void *arg) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 8833359556f3..1e616a8c6a9c 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -238,6 +238,8 @@ static int cg_test_proc_killed(const char *cgroup) return -1; } +static bool reclaim_until(const char *memcg, long goal); + /* * First, this test creates the following hierarchy: * A memory.min = 0, memory.max = 200M @@ -266,6 +268,12 @@ static int cg_test_proc_killed(const char *cgroup) * unprotected memory in A available, and checks that: * a) memory.min protects pagecache even in this case, * b) memory.low allows reclaiming page cache with low events. + * + * Then we try to reclaim from A/B/C using memory.reclaim until its + * usage reaches 10M. + * This makes sure that: + * (a) We ignore the protection of the reclaim target memcg. + * (b) The previously calculated emin value (~29M) should be dismissed. */ static int test_memcg_protection(const char *root, bool min) { @@ -385,6 +393,9 @@ static int test_memcg_protection(const char *root, bool min) if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; + if (!reclaim_until(children[0], MB(10))) + goto cleanup; + if (min) { ret = KSFT_PASS; goto cleanup; @@ -646,6 +657,48 @@ cleanup: } /* + * Reclaim from @memcg until usage reaches @goal by writing to + * memory.reclaim. + * + * This function will return false if the usage is already below the + * goal. + * + * This function assumes that writing to memory.reclaim is the only + * source of change in memory.current (no concurrent allocations or + * reclaim). + * + * This function makes sure memory.reclaim is sane. It will return + * false if memory.reclaim's error codes do not make sense, even if + * the usage goal was satisfied. + */ +static bool reclaim_until(const char *memcg, long goal) +{ + char buf[64]; + int retries, err; + long current, to_reclaim; + bool reclaimed = false; + + for (retries = 5; retries > 0; retries--) { + current = cg_read_long(memcg, "memory.current"); + + if (current < goal || values_close(current, goal, 3)) + break; + /* Did memory.reclaim return 0 incorrectly? */ + else if (reclaimed) + return false; + + to_reclaim = current - goal; + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) + reclaimed = true; + else if (err != -EAGAIN) + return false; + } + return reclaimed; +} + +/* * This test checks that memory.reclaim reclaims the given * amount of memory (from both anon and file, if possible). */ @@ -653,8 +706,7 @@ static int test_memcg_reclaim(const char *root) { int ret = KSFT_FAIL, fd, retries; char *memcg; - long current, expected_usage, to_reclaim; - char buf[64]; + long current, expected_usage; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -705,41 +757,8 @@ static int test_memcg_reclaim(const char *root) * Reclaim until current reaches 30M, this makes sure we hit both anon * and file if swap is enabled. */ - retries = 5; - while (true) { - int err; - - current = cg_read_long(memcg, "memory.current"); - to_reclaim = current - MB(30); - - /* - * We only keep looping if we get EAGAIN, which means we could - * not reclaim the full amount. - */ - if (to_reclaim <= 0) - goto cleanup; - - - snprintf(buf, sizeof(buf), "%ld", to_reclaim); - err = cg_write(memcg, "memory.reclaim", buf); - if (!err) { - /* - * If writing succeeds, then the written amount should have been - * fully reclaimed (and maybe more). - */ - current = cg_read_long(memcg, "memory.current"); - if (!values_close(current, MB(30), 3) && current > MB(30)) - goto cleanup; - break; - } - - /* The kernel could not reclaim the full amount, try again. */ - if (err == -EAGAIN && retries--) - continue; - - /* We got an unexpected error or ran out of retries. */ + if (!reclaim_until(memcg, MB(30))) goto cleanup; - } ret = KSFT_PASS; cleanup: diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index a1fa2eff8192..b71247ba7196 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,6 +7,8 @@ TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh -TEST_PROGS += sysfs.sh +TEST_PROGS += debugfs_rm_non_contexts.sh +TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh new file mode 100644 index 000000000000..48b7af6b022c --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test putting non-ctx files/dirs to rm_contexts file +# =================================================== + +dmesg -C + +for file in "$DBGFS/"* +do + echo "$(basename "$f")" > "$DBGFS/rm_contexts" + if dmesg | grep -q BUG + then + dmesg + exit 1 + fi +done diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index ad7a6b4cf338..a6fe0689f88d 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -8,6 +8,13 @@ #include <unistd.h> #include <stdio.h> +#pragma GCC diagnostic push +#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1 +/* Ignore read(2) overflow and write(2) overread compile warnings */ +#pragma GCC diagnostic ignored "-Wstringop-overread" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); @@ -27,6 +34,8 @@ void write_read_with_huge_count(char *file) close(filedesc); } +#pragma GCC diagnostic pop + int main(int argc, char *argv[]) { if (argc != 2) { diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh new file mode 100644 index 000000000000..61b80197c896 --- /dev/null +++ b/tools/testing/selftests/damon/lru_sort.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled" +if [ ! -f "$damon_lru_sort_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_LRU_SORT not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh new file mode 100644 index 000000000000..78dbc2334cbe --- /dev/null +++ b/tools/testing/selftests/damon/reclaim.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled" +if [ ! -f "$damon_reclaim_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_RECLAIM not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_reclaim_enabled" + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_reclaim_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 89592c64462f..db4942383a50 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -80,6 +80,12 @@ test_range() ensure_file "$range_dir/max" "exist" 600 } +test_tried_regions() +{ + tried_regions_dir=$1 + ensure_dir "$tried_regions_dir" "exist" +} + test_stats() { stats_dir=$1 @@ -138,6 +144,7 @@ test_scheme() test_quotas "$scheme_dir/quotas" test_watermarks "$scheme_dir/watermarks" test_stats "$scheme_dir/stats" + test_tried_regions "$scheme_dir/tried_regions" } test_schemes() diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh new file mode 100644 index 000000000000..ade35576e748 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_sysfs="/sys/kernel/mm/damon/admin" +if [ ! -d "$damon_sysfs" ] +then + echo "damon sysfs not found" + exit $ksft_skip +fi + +# clear log +dmesg -C + +# start DAMON with a scheme +echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts" +echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets" +echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" +scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0" +echo 4096000 > "$scheme_dir/access_pattern/sz/max" +echo 20 > "$scheme_dir/access_pattern/nr_accesses/max" +echo 1024 > "$scheme_dir/access_pattern/age/max" +echo "on" > "$damon_sysfs/kdamonds/0/state" +sleep 0.3 + +# remove scheme sysfs dir +echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" + +# try to update stat of already removed scheme sysfs dir +echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_stats triggers a kernel bug" + dmesg + exit 1 +fi + +# try to update tried regions of already removed scheme sysfs dir +echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_tried_regions triggers a kernel bug" + dmesg + exit 1 +fi + +echo "off" > "$damon_sysfs/kdamonds/0/state" diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 6b8d2e2f23c2..0f3921908b07 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -5,7 +5,9 @@ TEST_PROGS := \ bond-arp-interval-causes-panic.sh \ bond-break-lacpdu-tx.sh \ bond-lladdr-target.sh \ - dev_addr_lists.sh + dev_addr_lists.sh \ + mode-1-recovery-updelay.sh \ + mode-2-recovery-updelay.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 16c7fb858ac1..2a268b17b61f 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +NAMESPACES="" + # Test that a link aggregation device (bonding, team) removes the hardware # addresses that it adds on its underlying devices. test_LAG_cleanup() @@ -59,3 +61,107 @@ test_LAG_cleanup() log_test "$driver cleanup mode $mode" } + +# Build a generic 2 node net namespace with 2 connections +# between the namespaces +# +# +-----------+ +-----------+ +# | node1 | | node2 | +# | | | | +# | | | | +# | eth0 +-------+ eth0 | +# | | | | +# | eth1 +-------+ eth1 | +# | | | | +# +-----------+ +-----------+ +lag_setup2x2() +{ + local state=${1:-down} + local namespaces="lag_node1 lag_node2" + + # create namespaces + for n in ${namespaces}; do + ip netns add ${n} + done + + # wire up namespaces + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth0 + ip link set dev lag1-end netns lag_node2 $state name eth0 + + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth1 + ip link set dev lag1-end netns lag_node2 $state name eth1 + + NAMESPACES="${namespaces}" +} + +# cleanup all lag related namespaces and remove the bonding module +lag_cleanup() +{ + for n in ${NAMESPACES}; do + ip netns delete ${n} >/dev/null 2>&1 || true + done + modprobe -r bonding +} + +SWITCH="lag_node1" +CLIENT="lag_node2" +CLIENTIP="172.20.2.1" +SWITCHIP="172.20.2.2" + +lag_setup_network() +{ + lag_setup2x2 "down" + + # create switch + ip netns exec ${SWITCH} ip link add br0 up type bridge + ip netns exec ${SWITCH} ip link set eth0 master br0 up + ip netns exec ${SWITCH} ip link set eth1 master br0 up + ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0 +} + +lag_reset_network() +{ + ip netns exec ${CLIENT} ip link del bond0 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 up +} + +create_bond() +{ + # create client + ip netns exec ${CLIENT} ip link set eth0 down + ip netns exec ${CLIENT} ip link set eth1 down + + ip netns exec ${CLIENT} ip link add bond0 type bond $@ + ip netns exec ${CLIENT} ip link set eth0 master bond0 + ip netns exec ${CLIENT} ip link set eth1 master bond0 + ip netns exec ${CLIENT} ip link set bond0 up + ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0 +} + +test_bond_recovery() +{ + RET=0 + + create_bond $@ + + # verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + check_err $? "No connectivity" + + # force the links of the bond down + ip netns exec ${SWITCH} ip link set eth0 down + sleep 2 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 down + + # re-verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + + local rc=$? + check_err $rc "Bond failed to recover" + log_test "$1 ($2) bond recovery" + lag_reset_network +} diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh new file mode 100755 index 000000000000..ad4c845a4ac7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 1 miimon 100 updelay 0 +test_bond_recovery mode 1 miimon 100 updelay 200 +test_bond_recovery mode 1 miimon 100 updelay 500 +test_bond_recovery mode 1 miimon 100 updelay 1000 +test_bond_recovery mode 1 miimon 100 updelay 2000 +test_bond_recovery mode 1 miimon 100 updelay 5000 +test_bond_recovery mode 1 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh new file mode 100755 index 000000000000..2330d37453f9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 2 miimon 100 updelay 0 +test_bond_recovery mode 2 miimon 100 updelay 200 +test_bond_recovery mode 2 miimon 100 updelay 500 +test_bond_recovery mode 2 miimon 100 updelay 1000 +test_bond_recovery mode 2 miimon 100 updelay 2000 +test_bond_recovery mode 2 miimon 100 updelay 5000 +test_bond_recovery mode 2 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings index 867e118223cd..6091b45d226b 100644 --- a/tools/testing/selftests/drivers/net/bonding/settings +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -1 +1 @@ -timeout=60 +timeout=120 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh index d3a891d421ab..64153bbf95df 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -83,6 +83,7 @@ ALL_TESTS=" ptp_general_test flow_action_sample_test flow_action_trap_test + eapol_test " NUM_NETIFS=4 source $lib_dir/lib.sh @@ -677,6 +678,27 @@ flow_action_trap_test() tc qdisc del dev $rp1 clsact } +eapol_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:03:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:8E:"$( : ETH type + ) + echo $p +} + +eapol_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \ + $(eapol_payload_get $h1mac) -p 100 -q +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index a4c2812e9807..8d4b2c6265b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -14,6 +14,7 @@ ALL_TESTS=" ingress_stp_filter_test port_list_is_empty_test port_loopback_filter_test + locked_port_test " NUM_NETIFS=4 source $lib_dir/tc_common.sh @@ -420,6 +421,110 @@ port_loopback_filter_test() port_loopback_filter_uc_test } +locked_port_miss_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after adding an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + log_test "Locked port - FDB miss" + + devlink_trap_action_set $trap_name "drop" + bridge link set dev $swp1 learning on +} + +locked_port_mismatch_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + bridge fdb replace $smac dev $swp2 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + bridge link set dev $swp1 locked on + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after replacing an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + devlink_trap_action_set $trap_name "drop" + + log_test "Locked port - FDB mismatch" + + bridge link set dev $swp1 locked off + bridge link set dev $swp1 learning on +} + +locked_port_test() +{ + locked_port_miss_test + locked_port_mismatch_test +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh index f62ce479c266..878125041fc3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -31,7 +31,7 @@ # | 2001:db8:10::2/64 | # +-------------------------+ -lib_dir=$(dirname $0)/../../../../net/forwarding +lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" decap_error_test diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 04f03ae9d8fb..5e89657857c7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -34,6 +34,7 @@ ALL_TESTS=" nexthop_obj_bucket_offload_test nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test + bridge_locked_port_test devlink_reload_test " NUM_NETIFS=2 @@ -917,6 +918,36 @@ nexthop_obj_route_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +bridge_locked_port_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link set dev $swp1.10 master br1 + + bridge link set dev $swp1.10 locked on + check_fail $? "managed to set locked flag on a VLAN upper" + + ip link set dev $swp1.10 nomaster + ip link set dev $swp1 master br1 + + bridge link set dev $swp1 locked on + check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper" + + ip link del dev $swp1.10 + bridge link set dev $swp1 locked on + + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_fail $? "managed to configure a VLAN upper on a locked port" + + log_test "bridge locked port" + + ip link del dev $swp1.10 &> /dev/null + ip link del dev br1 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index a90f394f9aa9..d374878cc0ba 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -87,6 +87,11 @@ test_create_read() { local file=$efivarfs_mount/$FUNCNAME-$test_guid ./create-read $file + if [ $? -ne 0 ]; then + echo "create and read $file failed" + file_cleanup $file + exit 1 + fi file_cleanup $file } diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 8d26d5505808..3eea2abf68f9 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -38,11 +38,18 @@ cnt_trace() { test_event_enabled() { val=$1 + check_times=10 # wait for 10 * SLEEP_TIME at most - e=`cat $EVENT_ENABLE` - if [ "$e" != $val ]; then - fail "Expected $val but found $e" - fi + while [ $check_times -ne 0 ]; do + e=`cat $EVENT_ENABLE` + if [ "$e" == $val ]; then + return 0 + fi + sleep $SLEEP_TIME + check_times=$((check_times - 1)) + done + + fail "Expected $val but found $e" } run_enable_disable() { diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5f6cbec847fc..779f3e62ec90 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -142,9 +142,15 @@ finish_ftrace() { check_requires() { # Check required files and tracers for i in "$@" ; do + p=${i%:program} r=${i%:README} t=${i%:tracer} - if [ $t != $i ]; then + if [ $p != $i ]; then + if ! which $p ; then + echo "Required program $p is not found." + exit_unresolved + fi + elif [ $t != $i ]; then if ! grep -wq $t available_tracers ; then echo "Required tracer $t is not configured." exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc index 22bff122b933..ba1038953873 100644 --- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc @@ -46,10 +46,10 @@ cat trace grep -q "tracer: preemptoff" trace || fail # Check the end of the section -egrep -q "5.....us : <stack trace>" trace || fail +grep -E -q "5.....us : <stack trace>" trace || fail # Check for 500ms of latency -egrep -q "latency: 5..... us" trace || fail +grep -E -q "latency: 5..... us" trace || fail reset_tracer @@ -69,10 +69,10 @@ cat trace grep -q "tracer: irqsoff" trace || fail # Check the end of the section -egrep -q "5.....us : <stack trace>" trace || fail +grep -E -q "5.....us : <stack trace>" trace || fail # Check for 500ms of latency -egrep -q "latency: 5..... us" trace || fail +grep -E -q "latency: 5..... us" trace || fail reset_tracer exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc index 11be10e1bf96..e8f0fac9a110 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup tracer -# requires: wakeup:tracer - -if ! which chrt ; then - echo "chrt is not found. This test requires nice command." - exit_unresolved -fi +# requires: wakeup:tracer chrt:program echo wakeup > current_tracer echo 1 > tracing_on diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc index 3a77198b3c69..79807656785b 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup RT tracer -# requires: wakeup_rt:tracer - -if ! which chrt ; then - echo "chrt is not found. This test requires chrt command." - exit_unresolved -fi +# requires: wakeup_rt:tracer chrt:program echo wakeup_rt > current_tracer echo 1 > tracing_on diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc index 41119e0440e9..04c5dd7d0acc 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test field variable support -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 9098f1e7433f..f7447d800899 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event combined histogram trigger -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index adaabb873ed4..91339c130832 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onchange action -# requires: set_event "onchange(var)":README +# requires: set_event "onchange(var)":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc index 20e39471052e..d645abcf11c4 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc index f4b03ab7c287..c369247efb35 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch-onmax action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc index 71c9b5911c70..e28dc5f11b2b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmax action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 67fa328b830f..147967e86584 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger snapshot action -# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README +# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc index 6461c375694f..c2a8ab01e13b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc @@ -14,7 +14,7 @@ FIELD="filename" SYNTH="synth_open" EPROBE="eprobe_open" -echo "$SYNTH u64 filename; s64 ret;" > synthetic_events +echo "$SYNTH unsigned long filename; long ret;" > synthetic_events echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc index 3d65c856eca3..213d890ed188 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action with dynamic string param -# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc index c126d2350a6d..d7312047ce28 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README +# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 341e3de00896..9f539d454ee4 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -27,7 +27,7 @@ remove_chip() { continue fi - LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | egrep ^line` + LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | grep -E ^line` if [ "$?" = 0 ]; then for LINE in $LINES; do if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore new file mode 100644 index 000000000000..7d0703049eba --- /dev/null +++ b/tools/testing/selftests/iommu/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +/iommufd +/iommufd_fail_nth diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile new file mode 100644 index 000000000000..7cb74d26f141 --- /dev/null +++ b/tools/testing/selftests/iommu/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -Wall -O2 -Wno-unused-function +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../include/ + +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS := +TEST_GEN_PROGS += iommufd +TEST_GEN_PROGS += iommufd_fail_nth + +include ../lib.mk diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config new file mode 100644 index 000000000000..6c4f901d6fed --- /dev/null +++ b/tools/testing/selftests/iommu/config @@ -0,0 +1,2 @@ +CONFIG_IOMMUFD +CONFIG_IOMMUFD_TEST diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c new file mode 100644 index 000000000000..8aa8a346cf22 --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd.c @@ -0,0 +1,1654 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/eventfd.h> + +#define __EXPORTED_HEADERS__ +#include <linux/vfio.h> + +#include "iommufd_utils.h" + +static void *buffer; + +static unsigned long PAGE_SIZE; +static unsigned long HUGEPAGE_SIZE; + +#define MOCK_PAGE_SIZE (PAGE_SIZE / 2) + +static unsigned long get_huge_page_size(void) +{ + char buf[80]; + int ret; + int fd; + + fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + if (fd < 0) + return 2 * 1024 * 1024; + + ret = read(fd, buf, sizeof(buf)); + close(fd); + if (ret <= 0 || ret == sizeof(buf)) + return 2 * 1024 * 1024; + buf[ret] = 0; + return strtoul(buf, NULL, 10); +} + +static __attribute__((constructor)) void setup_sizes(void) +{ + void *vrc; + int rc; + + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); + HUGEPAGE_SIZE = get_huge_page_size(); + + BUFFER_SIZE = PAGE_SIZE * 16; + rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE); + assert(!rc); + assert(buffer); + assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0); + vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(vrc == buffer); +} + +FIXTURE(iommufd) +{ + int fd; +}; + +FIXTURE_SETUP(iommufd) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); +} + +FIXTURE_TEARDOWN(iommufd) +{ + teardown_iommufd(self->fd, _metadata); +} + +TEST_F(iommufd, simple_close) +{ +} + +TEST_F(iommufd, cmd_fail) +{ + struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 }; + + /* object id is invalid */ + EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0)); + /* Bad pointer */ + EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL)); + /* Unknown ioctl */ + EXPECT_ERRNO(ENOTTY, + ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1), + &cmd)); +} + +TEST_F(iommufd, cmd_length) +{ +#define TEST_LENGTH(_struct, _ioctl) \ + { \ + struct { \ + struct _struct cmd; \ + uint8_t extra; \ + } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \ + .extra = UINT8_MAX }; \ + int old_errno; \ + int rc; \ + \ + EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct); \ + rc = ioctl(self->fd, _ioctl, &cmd); \ + old_errno = errno; \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + cmd.extra = 0; \ + if (rc) { \ + EXPECT_ERRNO(old_errno, \ + ioctl(self->fd, _ioctl, &cmd)); \ + } else { \ + ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd)); \ + } \ + } + + TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); + TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); + TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); + TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); + TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP); + TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY); + TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP); + TEST_LENGTH(iommu_option, IOMMU_OPTION); + TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS); +#undef TEST_LENGTH +} + +TEST_F(iommufd, cmd_ex_fail) +{ + struct { + struct iommu_destroy cmd; + __u64 future; + } cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } }; + + /* object id is invalid and command is longer */ + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* future area is non-zero */ + cmd.future = 1; + EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Original command "works" */ + cmd.cmd.size = sizeof(cmd.cmd); + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Short command fails */ + cmd.cmd.size = sizeof(cmd.cmd) - 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd)); +} + +TEST_F(iommufd, global_options) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_GET, + .val64 = 1, + }; + + cmd.option_id = IOMMU_OPTION_RLIMIT_MODE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + /* This requires root */ + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.option_id = IOMMU_OPTION_HUGE_PAGES; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.op = IOMMU_OPTION_OP_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +FIXTURE(iommufd_ioas) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint64_t base_iova; +}; + +FIXTURE_VARIANT(iommufd_ioas) +{ + unsigned int mock_domains; + unsigned int memory_limit; +}; + +FIXTURE_SETUP(iommufd_ioas) +{ + unsigned int i; + + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + if (!variant->memory_limit) { + test_ioctl_set_default_memory_limit(); + } else { + test_ioctl_set_temp_memory_limit(variant->memory_limit); + } + + for (i = 0; i != variant->mock_domains; i++) { + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_id); + self->base_iova = MOCK_APERTURE_START; + } +} + +FIXTURE_TEARDOWN(iommufd_ioas) +{ + test_ioctl_set_default_memory_limit(); + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain) +{ +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain) +{ + .mock_domains = 1, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain) +{ + .mock_domains = 2, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit) +{ + .mock_domains = 1, + .memory_limit = 16, +}; + +TEST_F(iommufd_ioas, ioas_auto_destroy) +{ +} + +TEST_F(iommufd_ioas, ioas_destroy) +{ + if (self->domain_id) { + /* IOAS cannot be freed while a domain is on it */ + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + } else { + /* Can allocate and manually free an IOAS table */ + test_ioctl_destroy(self->ioas_id); + } +} + +TEST_F(iommufd_ioas, ioas_area_destroy) +{ + /* Adding an area does not change ability to destroy */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + if (self->domain_id) + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + else + test_ioctl_destroy(self->ioas_id); +} + +TEST_F(iommufd_ioas, ioas_area_auto_destroy) +{ + int i; + + /* Can allocate and automatically free an IOAS table with many areas */ + for (i = 0; i != 10; i++) { + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + } +} + +TEST_F(iommufd_ioas, area) +{ + int i; + + /* Unmap fails if nothing is mapped */ + for (i = 0; i != 10; i++) + test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE); + + /* Unmap works */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE, + PAGE_SIZE); + + /* Split fails */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE, + PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE, + PAGE_SIZE); + + /* Over map fails */ + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 17 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 15 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3, + self->base_iova + 15 * PAGE_SIZE); + + /* unmap all works */ + test_ioctl_ioas_unmap(0, UINT64_MAX); + + /* Unmap all succeeds on an empty IOAS */ + test_ioctl_ioas_unmap(0, UINT64_MAX); +} + +TEST_F(iommufd_ioas, unmap_fully_contained_areas) +{ + uint64_t unmap_len; + int i; + + /* Give no_domain some space to rewind base_iova */ + self->base_iova += 4 * PAGE_SIZE; + + for (i = 0; i != 4; i++) + test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE, + self->base_iova + i * 16 * PAGE_SIZE); + + /* Unmap not fully contained area doesn't work */ + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, + self->base_iova + 3 * 16 * PAGE_SIZE + + 8 * PAGE_SIZE - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + + /* Unmap fully contained areas works */ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, + self->base_iova - 4 * PAGE_SIZE, + 3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE + + 4 * PAGE_SIZE, + &unmap_len)); + ASSERT_EQ(32 * PAGE_SIZE, unmap_len); +} + +TEST_F(iommufd_ioas, area_auto_iova) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + __u64 iovas[10]; + int i; + + /* Simple 4k pages */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE); + + /* Kernel automatically aligns IOVAs properly */ + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + if (self->domain_id) { + test_ioctl_ioas_map(buffer, length, &iovas[i]); + } else { + test_ioctl_ioas_map((void *)(1UL << 31), length, + &iovas[i]); + } + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Avoids a reserved region */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(false, + iovas[i] > test_cmd.add_reserved.start && + iovas[i] < + test_cmd.add_reserved.start + + test_cmd.add_reserved.length); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Allowed region intersects with a reserved region */ + ranges[0].start = PAGE_SIZE; + ranges[0].last = PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allocate from an allowed region */ + if (self->domain_id) { + ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE; + ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1; + } else { + ranges[0].start = PAGE_SIZE * 200; + ranges[0].last = PAGE_SIZE * 600 - 1; + } + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(true, iovas[i] >= ranges[0].start); + EXPECT_EQ(true, iovas[i] <= ranges[0].last); + EXPECT_EQ(true, iovas[i] + length > ranges[0].start); + EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); +} + +TEST_F(iommufd_ioas, area_allowed) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Reserved intersects an allowed */ + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE; + test_cmd.add_reserved.length = PAGE_SIZE; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allowed intersects a reserved */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); +} + +TEST_F(iommufd_ioas, copy_area) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .src_ioas_id = self->ioas_id, + .length = PAGE_SIZE, + }; + + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + + /* Copy inside a single IOAS */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = self->base_iova + PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + + /* Copy between IOAS's */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = 0; + test_ioctl_ioas_alloc(©_cmd.dst_ioas_id); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); +} + +TEST_F(iommufd_ioas, iova_ranges) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE }, + }; + struct iommu_iova_range *ranges = buffer; + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .ioas_id = self->ioas_id, + .num_iovas = BUFFER_SIZE / sizeof(*ranges), + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Range can be read */ + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + if (!self->domain_id) { + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(SIZE_MAX, ranges[0].last); + EXPECT_EQ(1, ranges_cmd.out_iova_alignment); + } else { + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 0; + EXPECT_ERRNO(EMSGSIZE, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(0, ranges[0].last); + + /* 2 ranges */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + if (!self->domain_id) { + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start); + EXPECT_EQ(SIZE_MAX, ranges[1].last); + } else { + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 1; + if (!self->domain_id) { + EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, + &ranges_cmd)); + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + EXPECT_EQ(0, ranges[1].start); + EXPECT_EQ(0, ranges[1].last); +} + +TEST_F(iommufd_ioas, access_pin) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_test_cmd check_map_cmd = { + .size = sizeof(check_map_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_MAP, + .check_map = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + uint32_t access_pages_id; + unsigned int npages; + + test_cmd_create_access(self->ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) { + uint32_t mock_device_id; + uint32_t mock_hwpt_id; + + access_cmd.access_pages.length = npages * PAGE_SIZE; + + /* Single map/unmap */ + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + /* Double user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + access_pages_id = access_cmd.access_pages.out_access_pages_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access_pages(access_cmd.id, access_pages_id); + + /* Add/remove a domain with a user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &mock_hwpt_id); + check_map_cmd.id = mock_hwpt_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), + &check_map_cmd)); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(mock_hwpt_id); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + } + test_cmd_destroy_access(access_cmd.id); +} + +TEST_F(iommufd_ioas, access_pin_unmap) +{ + struct iommu_test_cmd access_pages_cmd = { + .size = sizeof(access_pages_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + test_cmd_create_access(self->ioas_id, &access_pages_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_pages_cmd)); + + /* Trigger the unmap op */ + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + + /* kernel removed the item for us */ + test_err_destroy_access_pages( + ENOENT, access_pages_cmd.id, + access_pages_cmd.access_pages.out_access_pages_id); +} + +static void check_access_rw(struct __test_metadata *_metadata, int fd, + unsigned int access_id, uint64_t iova, + unsigned int def_flags) +{ + uint16_t tmp[32]; + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = access_id, + .access_rw = { .uptr = (uintptr_t)tmp }, + }; + uint16_t *buffer16 = buffer; + unsigned int i; + void *tmp2; + + for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++) + buffer16[i] = rand(); + + for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50; + access_cmd.access_rw.iova < iova + PAGE_SIZE + 50; + access_cmd.access_rw.iova++) { + for (access_cmd.access_rw.length = 1; + access_cmd.access_rw.length < sizeof(tmp); + access_cmd.access_rw.length++) { + access_cmd.access_rw.flags = def_flags; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + + for (i = 0; i != ARRAY_SIZE(tmp); i++) + tmp[i] = rand(); + access_cmd.access_rw.flags = def_flags | + MOCK_ACCESS_RW_WRITE; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + } + } + + /* Multi-page test */ + tmp2 = malloc(BUFFER_SIZE); + ASSERT_NE(NULL, tmp2); + access_cmd.access_rw.iova = iova; + access_cmd.access_rw.length = BUFFER_SIZE; + access_cmd.access_rw.flags = def_flags; + access_cmd.access_rw.uptr = (uintptr_t)tmp2; + ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length)); + free(tmp2); +} + +TEST_F(iommufd_ioas, access_rw) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + check_access_rw(_metadata, self->fd, access_id, iova, + MOCK_ACCESS_RW_SLOW_PATH); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, access_rw_unaligned) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Unaligned pages */ + iova = self->base_iova + MOCK_PAGE_SIZE; + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_gone) +{ + __u32 access_id; + pid_t child; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + if (self->domain_id) { + /* + * If a domain already existed then everything was pinned within + * the fork, so this copies from one domain to another. + */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, + MOCK_APERTURE_START, 0); + + } else { + /* + * Otherwise we need to actually pin pages which can't happen + * since the fork is gone. + */ + test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL); + } + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_present) +{ + __u32 access_id; + int pipefds[2]; + uint64_t tmp; + pid_t child; + int efd; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC)); + efd = eventfd(0, EFD_CLOEXEC); + ASSERT_NE(-1, efd); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + __u64 iova; + uint64_t one = 1; + + close(pipefds[1]); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + if (write(efd, &one, sizeof(one)) != sizeof(one)) + exit(100); + if (read(pipefds[0], &iova, 1) != 1) + exit(100); + exit(0); + } + close(pipefds[0]); + ASSERT_NE(-1, child); + ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); + + /* Read pages from the remote process */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); + + ASSERT_EQ(0, close(pipefds[1])); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, ioas_option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.val64 = 3; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +TEST_F(iommufd_ioas, ioas_iova_alloc) +{ + unsigned int length; + __u64 iova; + + for (length = 1; length != PAGE_SIZE * 2; length++) { + if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) { + test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova); + } else { + test_ioctl_ioas_map(buffer, length, &iova); + test_ioctl_ioas_unmap(iova, length); + } + } +} + +TEST_F(iommufd_ioas, ioas_align_change) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_SET, + .object_id = self->ioas_id, + /* 0 means everything must be aligned to PAGE_SIZE */ + .val64 = 0, + }; + + /* + * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain + * and map are present. + */ + if (variant->mock_domains) + return; + + /* + * We can upgrade to PAGE_SIZE alignment when things are aligned right + */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Misalignment is rejected at map time */ + test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE, + PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Reduce alignment */ + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Confirm misalignment is rejected during alignment upgrade */ + test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + cmd.val64 = 0; + EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE); + test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE); +} + +TEST_F(iommufd_ioas, copy_sweep) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .src_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = MOCK_PAGE_SIZE, + }; + unsigned int dst_ioas_id; + uint64_t last_iova; + uint64_t iova; + + test_ioctl_ioas_alloc(&dst_ioas_id); + copy_cmd.dst_ioas_id = dst_ioas_id; + + if (variant->mock_domains) + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1; + else + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2; + + test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1, + MOCK_APERTURE_START); + + for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova; + iova += 511) { + copy_cmd.src_iova = iova; + if (iova < MOCK_APERTURE_START || + iova + copy_cmd.length - 1 > last_iova) { + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY, + ©_cmd)); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova, + copy_cmd.length); + } + } + + test_ioctl_destroy(dst_ioas_id); +} + +FIXTURE(iommufd_mock_domain) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint32_t domain_ids[2]; + int mmap_flags; + size_t mmap_buf_size; +}; + +FIXTURE_VARIANT(iommufd_mock_domain) +{ + unsigned int mock_domains; + bool hugepages; +}; + +FIXTURE_SETUP(iommufd_mock_domain) +{ + unsigned int i; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + ASSERT_GE(ARRAY_SIZE(self->domain_ids), variant->mock_domains); + + for (i = 0; i != variant->mock_domains; i++) + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_ids[i]); + self->domain_id = self->domain_ids[0]; + + self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; + self->mmap_buf_size = PAGE_SIZE * 8; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + self->mmap_buf_size = HUGEPAGE_SIZE * 2; + } +} + +FIXTURE_TEARDOWN(iommufd_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) +{ + .mock_domains = 1, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) +{ + .mock_domains = 2, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) +{ + .mock_domains = 1, + .hugepages = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) +{ + .mock_domains = 2, + .hugepages = true, +}; + +/* Have the kernel check that the user pages made it to the iommu_domain */ +#define check_mock_iova(_ptr, _iova, _length) \ + ({ \ + struct iommu_test_cmd check_map_cmd = { \ + .size = sizeof(check_map_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_MAP, \ + .id = self->domain_id, \ + .check_map = { .iova = _iova, \ + .length = _length, \ + .uptr = (uintptr_t)(_ptr) }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + if (self->domain_ids[1]) { \ + check_map_cmd.id = self->domain_ids[1]; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + } \ + }) + +TEST_F(iommufd_mock_domain, basic) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + + /* Simple one page map */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + check_mock_iova(buffer, iova, PAGE_SIZE); + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + + /* EFAULT half way through mapping */ + ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); + + /* EFAULT on first page */ + ASSERT_EQ(0, munmap(buf, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); +} + +TEST_F(iommufd_mock_domain, ro_unshare) +{ + uint8_t *buf; + __u64 iova; + int fd; + + fd = open("/proc/self/exe", O_RDONLY); + ASSERT_NE(-1, fd); + + buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + ASSERT_NE(MAP_FAILED, buf); + close(fd); + + /* + * There have been lots of changes to the "unshare" mechanism in + * get_user_pages(), make sure it works right. The write to the page + * after we map it for reading should not change the assigned PFN. + */ + ASSERT_EQ(0, + _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE, + &iova, IOMMU_IOAS_MAP_READABLE)); + check_mock_iova(buf, iova, PAGE_SIZE); + memset(buf, 1, PAGE_SIZE); + check_mock_iova(buf, iova, PAGE_SIZE); + ASSERT_EQ(0, munmap(buf, PAGE_SIZE)); +} + +TEST_F(iommufd_mock_domain, all_aligns) +{ + size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, all_aligns_copy) +{ + size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + unsigned int old_id; + uint32_t mock_device_id; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + + /* Add and destroy a domain while the area exists */ + old_id = self->domain_ids[1]; + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &self->domain_ids[1]); + + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(self->domain_ids[1]); + self->domain_ids[1] = old_id; + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, user_copy) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + }; + unsigned int ioas_id; + + /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, + ©_cmd.src_iova); + + test_cmd_create_access(ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + access_cmd.access_pages.iova = copy_cmd.src_iova; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + copy_cmd.src_ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + + test_cmd_destroy_access_pages( + access_cmd.id, access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id); + + test_ioctl_destroy(ioas_id); +} + +/* VFIO compatibility IOCTLs */ + +TEST_F(iommufd, simple_ioctls) +{ + ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION)); + ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)); +} + +TEST_F(iommufd, unmap_cmd) +{ + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + }; + + unmap_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.argsz = sizeof(unmap_cmd); + unmap_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.flags = 0; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); +} + +TEST_F(iommufd, map_cmd) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + .vaddr = (__u64)buffer, + }; + + map_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + map_cmd.argsz = sizeof(map_cmd); + map_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + /* Requires a domain to be attached */ + map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); +} + +TEST_F(iommufd, info_cmd) +{ + struct vfio_iommu_type1_info info_cmd = {}; + + /* Invalid argsz */ + info_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); + + info_cmd.argsz = sizeof(info_cmd); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); +} + +TEST_F(iommufd, set_iommu_cmd) +{ + /* Requires a domain to be attached */ + EXPECT_ERRNO(ENODEV, + ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU)); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)); +} + +TEST_F(iommufd, vfio_ioas) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_GET, + }; + __u32 ioas_id; + + /* ENODEV if there is no compat ioas */ + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Invalid id for set */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Valid id for set*/ + test_ioctl_ioas_alloc(&ioas_id); + vfio_ioas_cmd.ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Same id comes back from get */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id); + + /* Clear works */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); +} + +FIXTURE(vfio_compat_mock_domain) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(vfio_compat_mock_domain) +{ + unsigned int version; +}; + +FIXTURE_SETUP(vfio_compat_mock_domain) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_SET, + }; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + /* Create what VFIO would consider a group */ + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + + /* Attach it to the vfio compat */ + vfio_ioas_cmd.ioas_id = self->ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version)); +} + +FIXTURE_TEARDOWN(vfio_compat_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2) +{ + .version = VFIO_TYPE1v2_IOMMU, +}; + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0) +{ + .version = VFIO_TYPE1_IOMMU, +}; + +TEST_F(vfio_compat_mock_domain, simple_close) +{ +} + +TEST_F(vfio_compat_mock_domain, option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, cmd.val64); + } else { + ASSERT_EQ(1, cmd.val64); + } +} + +/* + * Execute an ioctl command stored in buffer and check that the result does not + * overflow memory. + */ +static bool is_filled(const void *buf, uint8_t c, size_t len) +{ + const uint8_t *cbuf = buf; + + for (; len; cbuf++, len--) + if (*cbuf != c) + return false; + return true; +} + +#define ioctl_check_buf(fd, cmd) \ + ({ \ + size_t _cmd_len = *(__u32 *)buffer; \ + \ + memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \ + ASSERT_EQ(0, ioctl(fd, cmd, buffer)); \ + ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA, \ + BUFFER_SIZE - _cmd_len)); \ + }) + +static void check_vfio_info_cap_chain(struct __test_metadata *_metadata, + struct vfio_iommu_type1_info *info_cmd) +{ + const struct vfio_info_cap_header *cap; + + ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap)); + cap = buffer + info_cmd->cap_offset; + while (true) { + size_t cap_size; + + if (cap->next) + cap_size = (buffer + cap->next) - (void *)cap; + else + cap_size = (buffer + info_cmd->argsz) - (void *)cap; + + switch (cap->id) { + case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: { + struct vfio_iommu_type1_info_cap_iova_range *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(1, data->nr_iovas); + EXPECT_EQ(MOCK_APERTURE_START, + data->iova_ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end); + break; + } + case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: { + struct vfio_iommu_type1_info_dma_avail *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(sizeof(*data), cap_size); + break; + } + default: + ASSERT_EQ(false, true); + break; + } + if (!cap->next) + break; + + ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap)); + ASSERT_GE(buffer + cap->next, (void *)cap); + cap = buffer + cap->next; + } +} + +TEST_F(vfio_compat_mock_domain, get_info) +{ + struct vfio_iommu_type1_info *info_cmd = buffer; + unsigned int i; + size_t caplen; + + /* Pre-cap ABI */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = offsetof(struct vfio_iommu_type1_info, cap_offset), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + + /* Read the cap chain size */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = sizeof(*info_cmd), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + ASSERT_EQ(0, info_cmd->cap_offset); + ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz); + + /* Read the caps, kernel should never create a corrupted caps */ + caplen = info_cmd->argsz; + for (i = sizeof(*info_cmd); i < caplen; i++) { + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = i, + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + if (!info_cmd->cap_offset) + continue; + check_vfio_info_cap_chain(_metadata, info_cmd); + } +} + +static void shuffle_array(unsigned long *array, size_t nelms) +{ + unsigned int i; + + /* Shuffle */ + for (i = 0; i != nelms; i++) { + unsigned long tmp = array[i]; + unsigned int other = rand() % (nelms - i); + + array[i] = array[other]; + array[other] = tmp; + } +} + +TEST_F(vfio_compat_mock_domain, map) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = (uintptr_t)buffer, + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE]; + unsigned int i; + + /* Simple map/unmap */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* UNMAP_FLAG_ALL requres 0 iova/size */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.iova = 0; + unmap_cmd.size = 0; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* Small pages */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + map_cmd.iova = pages_iova[i] = + MOCK_APERTURE_START + i * PAGE_SIZE; + map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE; + map_cmd.size = PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + } + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + unmap_cmd.flags = 0; + unmap_cmd.size = PAGE_SIZE; + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + } +} + +TEST_F(vfio_compat_mock_domain, huge_map) +{ + size_t buf_size = HUGEPAGE_SIZE * 2; + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .size = buf_size, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + }; + unsigned long pages_iova[16]; + unsigned int i; + void *buf; + + /* Test huge pages and splitting */ + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + map_cmd.vaddr = (uintptr_t)buf; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) + pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size); + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + /* type1 mode can cut up larger mappings, type1v2 always fails */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } else { + EXPECT_ERRNO(ENOENT, + ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c new file mode 100644 index 000000000000..9713111b820d --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + * + * These tests are "kernel integrity" tests. They are looking for kernel + * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging + * features. It does not attempt to verify that the system calls are doing what + * they are supposed to do. + * + * The basic philosophy is to run a sequence of calls that will succeed and then + * sweep every failure injection point on that call chain to look for + * interesting things in error handling. + * + * This test is best run with: + * echo 1 > /proc/sys/kernel/panic_on_warn + * If something is actually going wrong. + */ +#include <fcntl.h> +#include <dirent.h> + +#define __EXPORTED_HEADERS__ +#include <linux/vfio.h> + +#include "iommufd_utils.h" + +static bool have_fault_injection; + +static int writeat(int dfd, const char *fn, const char *val) +{ + size_t val_len = strlen(val); + ssize_t res; + int fd; + + fd = openat(dfd, fn, O_WRONLY); + if (fd == -1) + return -1; + res = write(fd, val, val_len); + assert(res == val_len); + close(fd); + return 0; +} + +static __attribute__((constructor)) void setup_buffer(void) +{ + BUFFER_SIZE = 2*1024*1024; + + buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); +} + +/* + * This sets up fail_injection in a way that is useful for this test. + * It does not attempt to restore things back to how they were. + */ +static __attribute__((constructor)) void setup_fault_injection(void) +{ + DIR *debugfs = opendir("/sys/kernel/debug/"); + struct dirent *dent; + + if (!debugfs) + return; + + /* Allow any allocation call to be fault injected */ + if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N")) + return; + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N"); + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N"); + + while ((dent = readdir(debugfs))) { + char fn[300]; + + if (strncmp(dent->d_name, "fail", 4) != 0) + continue; + + /* We are looking for kernel splats, quiet down the log */ + snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name); + writeat(dirfd(debugfs), fn, "0"); + } + closedir(debugfs); + have_fault_injection = true; +} + +struct fail_nth_state { + int proc_fd; + unsigned int iteration; +}; + +static void fail_nth_first(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + + snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid()); + nth_state->proc_fd = open(buf, O_RDWR); + ASSERT_NE(-1, nth_state->proc_fd); +} + +static bool fail_nth_next(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state, + int test_result) +{ + static const char disable_nth[] = "0"; + char buf[300]; + + /* + * This is just an arbitrary limit based on the current kernel + * situation. Changes in the kernel can dramtically change the number of + * required fault injection sites, so if this hits it doesn't + * necessarily mean a test failure, just that the limit has to be made + * bigger. + */ + ASSERT_GT(400, nth_state->iteration); + if (nth_state->iteration != 0) { + ssize_t res; + ssize_t res2; + + buf[0] = 0; + /* + * Annoyingly disabling the nth can also fail. This means + * the test passed without triggering failure + */ + res = pread(nth_state->proc_fd, buf, sizeof(buf), 0); + if (res == -1 && errno == EFAULT) { + buf[0] = '1'; + buf[1] = '\n'; + res = 2; + } + + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + if (res2 == -1 && errno == EFAULT) { + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + buf[0] = '1'; + buf[1] = '\n'; + } + ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2); + + /* printf(" nth %u result=%d nth=%u\n", nth_state->iteration, + test_result, atoi(buf)); */ + fflush(stdout); + ASSERT_LT(1, res); + if (res != 2 || buf[0] != '0' || buf[1] != '\n') + return false; + } else { + /* printf(" nth %u result=%d\n", nth_state->iteration, + test_result); */ + } + nth_state->iteration++; + return true; +} + +/* + * This is called during the test to start failure injection. It allows the test + * to do some setup that has already been swept and thus reduce the required + * iterations. + */ +void __fail_nth_enable(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + size_t len; + + if (!nth_state->iteration) + return; + + len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration); + ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0)); +} +#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state) + +#define TEST_FAIL_NTH(fixture_name, name) \ + static int test_nth_##name(struct __test_metadata *_metadata, \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + *variant, \ + struct fail_nth_state *_nth_state); \ + TEST_F(fixture_name, name) \ + { \ + struct fail_nth_state nth_state = {}; \ + int test_result = 0; \ + \ + if (!have_fault_injection) \ + SKIP(return, \ + "fault injection is not enabled in the kernel"); \ + fail_nth_first(_metadata, &nth_state); \ + ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \ + &nth_state)); \ + while (fail_nth_next(_metadata, &nth_state, test_result)) { \ + fixture_name##_teardown(_metadata, self, variant); \ + fixture_name##_setup(_metadata, self, variant); \ + test_result = test_nth_##name(_metadata, self, \ + variant, &nth_state); \ + }; \ + ASSERT_EQ(0, test_result); \ + } \ + static int test_nth_##name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \ + *variant, \ + struct fail_nth_state *_nth_state) + +FIXTURE(basic_fail_nth) +{ + int fd; + uint32_t access_id; +}; + +FIXTURE_SETUP(basic_fail_nth) +{ + self->fd = -1; + self->access_id = 0; +} + +FIXTURE_TEARDOWN(basic_fail_nth) +{ + int rc; + + if (self->access_id) { + /* The access FD holds the iommufd open until it closes */ + rc = _test_cmd_destroy_access(self->access_id); + assert(rc == 0); + } + teardown_iommufd(self->fd, _metadata); +} + +/* Cover ioas.c */ +TEST_FAIL_NTH(basic_fail_nth, basic) +{ + struct iommu_iova_range ranges[10]; + uint32_t ioas_id; + __u64 iova; + + fail_nth_enable(); + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + { + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .num_iovas = ARRAY_SIZE(ranges), + .ioas_id = ioas_id, + .allowed_iovas = (uintptr_t)ranges, + }; + if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)) + return -1; + } + + { + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + ranges[0].start = 16*1024; + ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1; + if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)) + return -1; + } + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + { + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE, + .dst_ioas_id = ioas_id, + .src_ioas_id = ioas_id, + .src_iova = iova, + .length = sizeof(ranges), + }; + + if (ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)) + return -1; + } + + if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, + NULL)) + return -1; + /* Failure path of no IOVA to unmap */ + _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL); + return 0; +} + +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_domain) +{ + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, map_two_domains) +{ + uint32_t ioas_id; + __u32 device_id2; + __u32 device_id; + __u32 hwpt_id2; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id2)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id2)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, access_rw) +{ + uint64_t tmp_big[4096]; + uint32_t ioas_id; + uint16_t tmp[32]; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0)) + return -1; + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .length = sizeof(tmp), + .uptr = (uintptr_t)tmp }, + }; + + // READ + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH | + MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .flags = MOCK_ACCESS_RW_SLOW_PATH, + .length = sizeof(tmp_big), + .uptr = (uintptr_t)tmp_big }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* pages.c access functions */ +TEST_FAIL_NTH(basic_fail_nth, access_pin) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* iopt_pages_fill_xarray() */ +TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + return 0; +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h new file mode 100644 index 000000000000..0d1f46369c2a --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#ifndef __SELFTEST_IOMMUFD_UTILS +#define __SELFTEST_IOMMUFD_UTILS + +#include <unistd.h> +#include <stddef.h> +#include <sys/fcntl.h> +#include <sys/ioctl.h> +#include <stdint.h> +#include <assert.h> + +#include "../kselftest_harness.h" +#include "../../../../drivers/iommu/iommufd/iommufd_test.h" + +/* Hack to make assertions more readable */ +#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD + +static void *buffer; +static unsigned long BUFFER_SIZE; + +/* + * Have the kernel check the refcount on pages. I don't know why a freshly + * mmap'd anon non-compound page starts out with a ref of 3 + */ +#define check_refs(_ptr, _length, _refs) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_REFS, \ + .check_refs = { .length = _length, \ + .uptr = (uintptr_t)(_ptr), \ + .refs = _refs }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \ + &test_cmd)); \ + }) + +static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *device_id, + __u32 *hwpt_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN, + .id = ioas_id, + .mock_domain = {}, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (device_id) + *device_id = cmd.mock_domain.out_device_id; + assert(cmd.id != 0); + if (hwpt_id) + *hwpt_id = cmd.mock_domain.out_hwpt_id; + return 0; +} +#define test_cmd_mock_domain(ioas_id, device_id, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, device_id, \ + hwpt_id)) +#define test_err_mock_domain(_errno, ioas_id, device_id, hwpt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ + device_id, hwpt_id)) + +static int _test_cmd_create_access(int fd, unsigned int ioas_id, + __u32 *access_id, unsigned int flags) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_CREATE_ACCESS, + .id = ioas_id, + .create_access = { .flags = flags }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + *access_id = cmd.create_access.out_access_fd; + return 0; +} +#define test_cmd_create_access(ioas_id, access_id, flags) \ + ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \ + flags)) + +static int _test_cmd_destroy_access(unsigned int access_id) +{ + return close(access_id); +} +#define test_cmd_destroy_access(access_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access(access_id)) + +static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id, + unsigned int access_pages_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES, + .id = access_id, + .destroy_access_pages = { .access_pages_id = access_pages_id }, + }; + return ioctl(fd, IOMMU_TEST_CMD, &cmd); +} +#define test_cmd_destroy_access_pages(access_id, access_pages_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \ + access_pages_id)) +#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \ + EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \ + self->fd, access_id, access_pages_id)) + +static int _test_ioctl_destroy(int fd, unsigned int id) +{ + struct iommu_destroy cmd = { + .size = sizeof(cmd), + .id = id, + }; + return ioctl(fd, IOMMU_DESTROY, &cmd); +} +#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id)) + +static int _test_ioctl_ioas_alloc(int fd, __u32 *id) +{ + struct iommu_ioas_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd); + if (ret) + return ret; + *id = cmd.out_ioas_id; + return 0; +} +#define test_ioctl_ioas_alloc(id) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \ + ASSERT_NE(0, *(id)); \ + }) + +static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer, + size_t length, __u64 *iova, unsigned int flags) +{ + struct iommu_ioas_map cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .user_va = (uintptr_t)buffer, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd); + *iova = cmd.iova; + return ret; +} +#define test_ioctl_ioas_map(buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \ + iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_fixed(buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + ASSERT_EQ(0, _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, + size_t length, uint64_t *out_len) +{ + struct iommu_ioas_unmap cmd = { + .size = sizeof(cmd), + .ioas_id = ioas_id, + .iova = iova, + .length = length, + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd); + if (out_len) + *out_len = cmd.length; + return ret; +} +#define test_ioctl_ioas_unmap(iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \ + length, NULL)) + +#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \ + NULL)) + +#define test_err_ioctl_ioas_unmap(_errno, iova, length) \ + EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ + iova, length, NULL)) + +static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) +{ + struct iommu_test_cmd memlimit_cmd = { + .size = sizeof(memlimit_cmd), + .op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, + .memory_limit = { .limit = limit }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT), + &memlimit_cmd); +} + +#define test_ioctl_set_temp_memory_limit(limit) \ + ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit)) + +#define test_ioctl_set_default_memory_limit() \ + test_ioctl_set_temp_memory_limit(65536) + +static void teardown_iommufd(int fd, struct __test_metadata *_metadata) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_REFS, + .check_refs = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (fd == -1) + return; + + EXPECT_EQ(0, close(fd)); + + fd = open("/dev/iommu", O_RDWR); + EXPECT_NE(-1, fd); + EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), + &test_cmd)); + EXPECT_EQ(0, close(fd)); +} + +#define EXPECT_ERRNO(expected_errno, cmd) \ + ({ \ + ASSERT_EQ(-1, cmd); \ + EXPECT_EQ(expected_errno, errno); \ + }) + +#endif diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 708cb5429633..7424a1f5babc 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -90,7 +90,7 @@ pass_libs=() pass_cnt=0 # Get all TARGETS from selftests Makefile -targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) +targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) # Single test case if [ $# -eq 2 ] diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..6ce8c488d62e 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -4,6 +4,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/hypercalls +/aarch64/page_fault_test /aarch64/psci_test /aarch64/vcpu_width_config /aarch64/vgic_init @@ -16,16 +17,18 @@ /x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs -/x86_64/evmcs_test -/x86_64/emulator_error_test +/x86_64/exit_on_emulation_failure_test /x86_64/fix_hypercall_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test /x86_64/kvm_pv_test /x86_64/hyperv_clock /x86_64/hyperv_cpuid +/x86_64/hyperv_evmcs /x86_64/hyperv_features +/x86_64/hyperv_ipi /x86_64/hyperv_svm_test +/x86_64/hyperv_tlb_flush /x86_64/max_vcpuid_cap_test /x86_64/mmio_warning_test /x86_64/monitor_mwait_test @@ -36,11 +39,13 @@ /x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/sev_migrate_tests +/x86_64/smaller_maxphyaddr_emulation_test /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/svm_nested_soft_inject_test +/x86_64/svm_nested_shutdown_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/tsc_scaling_sync diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..947676983da1 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -43,16 +43,19 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c -LIBKVM += lib/perf_test_util.c +LIBKVM += lib/memstress.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c LIBKVM_STRING += lib/string_override.c LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/perf_test_util.c +LIBKVM_x86_64 += lib/x86_64/hyperv.c +LIBKVM_x86_64 += lib/x86_64/memstress.c LIBKVM_x86_64 += lib/x86_64/processor.c LIBKVM_x86_64 += lib/x86_64/svm.c LIBKVM_x86_64 += lib/x86_64/ucall.c @@ -80,13 +83,15 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features -TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test -TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test +TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test @@ -96,11 +101,13 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test @@ -152,10 +159,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 6f9c1f19c7f6..4951ac53d1f8 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" +#include <linux/bitfield.h> #define BAD_ID_REG_VAL 0x1badc0deul @@ -145,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); - el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT; + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val); return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; } @@ -158,12 +159,9 @@ int main(void) TEST_REQUIRE(vcpu_aarch64_only(vcpu)); - ucall_init(vm, NULL); - test_user_raz_wi(vcpu); test_user_raz_invariant(vcpu); test_guest_raz(vcpu); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 574eb73f0e90..26556a266021 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -222,7 +222,7 @@ static void *test_vcpu_run(void *arg) /* Currently, any exit from guest is an indication of completion */ pthread_mutex_lock(&vcpu_done_map_lock); - set_bit(vcpu_idx, vcpu_done_map); + __set_bit(vcpu_idx, vcpu_done_map); pthread_mutex_unlock(&vcpu_done_map_lock); switch (get_ucall(vcpu, &uc)) { @@ -375,7 +375,6 @@ static struct kvm_vm *test_vm_create(void) for (i = 0; i < nr_vcpus; i++) vcpu_init_descriptor_tables(vcpus[i]); - ucall_init(vm, NULL); test_init_timer_irq(vm); gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); @@ -414,36 +413,21 @@ static bool parse_args(int argc, char *argv[]) while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { switch (opt) { case 'n': - test_args.nr_vcpus = atoi(optarg); - if (test_args.nr_vcpus <= 0) { - pr_info("Positive value needed for -n\n"); - goto err; - } else if (test_args.nr_vcpus > KVM_MAX_VCPUS) { + test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); + if (test_args.nr_vcpus > KVM_MAX_VCPUS) { pr_info("Max allowed vCPUs: %u\n", KVM_MAX_VCPUS); goto err; } break; case 'i': - test_args.nr_iter = atoi(optarg); - if (test_args.nr_iter <= 0) { - pr_info("Positive value needed for -i\n"); - goto err; - } + test_args.nr_iter = atoi_positive("Number of iterations", optarg); break; case 'p': - test_args.timer_period_ms = atoi(optarg); - if (test_args.timer_period_ms <= 0) { - pr_info("Positive value needed for -p\n"); - goto err; - } + test_args.timer_period_ms = atoi_positive("Periodicity", optarg); break; case 'm': - test_args.migration_freq_ms = atoi(optarg); - if (test_args.migration_freq_ms < 0) { - pr_info("0 or positive value needed for -m\n"); - goto err; - } + test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); break; case 'h': default: @@ -462,9 +446,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 947bd201435c..637be796086f 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -2,6 +2,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <linux/bitfield.h> #define MDSCR_KDE (1 << 13) #define MDSCR_MDE (1 << 15) @@ -11,17 +12,24 @@ #define DBGBCR_EXEC (0x0 << 3) #define DBGBCR_EL1 (0x1 << 1) #define DBGBCR_E (0x1 << 0) +#define DBGBCR_LBN_SHIFT 16 +#define DBGBCR_BT_SHIFT 20 +#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT) +#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT) #define DBGWCR_LEN8 (0xff << 5) #define DBGWCR_RD (0x1 << 3) #define DBGWCR_WR (0x2 << 3) #define DBGWCR_EL1 (0x1 << 1) #define DBGWCR_E (0x1 << 0) +#define DBGWCR_LBN_SHIFT 16 +#define DBGWCR_WT_SHIFT 20 +#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT) #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) -extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; extern unsigned char iter_ss_begin, iter_ss_end; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; @@ -29,8 +37,74 @@ static volatile uint64_t svc_addr; static volatile uint64_t ss_addr[4], ss_idx; #define PC(v) ((uint64_t)&(v)) +#define GEN_DEBUG_WRITE_REG(reg_name) \ +static void write_##reg_name(int num, uint64_t val) \ +{ \ + switch (num) { \ + case 0: \ + write_sysreg(val, reg_name##0_el1); \ + break; \ + case 1: \ + write_sysreg(val, reg_name##1_el1); \ + break; \ + case 2: \ + write_sysreg(val, reg_name##2_el1); \ + break; \ + case 3: \ + write_sysreg(val, reg_name##3_el1); \ + break; \ + case 4: \ + write_sysreg(val, reg_name##4_el1); \ + break; \ + case 5: \ + write_sysreg(val, reg_name##5_el1); \ + break; \ + case 6: \ + write_sysreg(val, reg_name##6_el1); \ + break; \ + case 7: \ + write_sysreg(val, reg_name##7_el1); \ + break; \ + case 8: \ + write_sysreg(val, reg_name##8_el1); \ + break; \ + case 9: \ + write_sysreg(val, reg_name##9_el1); \ + break; \ + case 10: \ + write_sysreg(val, reg_name##10_el1); \ + break; \ + case 11: \ + write_sysreg(val, reg_name##11_el1); \ + break; \ + case 12: \ + write_sysreg(val, reg_name##12_el1); \ + break; \ + case 13: \ + write_sysreg(val, reg_name##13_el1); \ + break; \ + case 14: \ + write_sysreg(val, reg_name##14_el1); \ + break; \ + case 15: \ + write_sysreg(val, reg_name##15_el1); \ + break; \ + default: \ + GUEST_ASSERT(0); \ + } \ +} + +/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */ +GEN_DEBUG_WRITE_REG(dbgbcr) +GEN_DEBUG_WRITE_REG(dbgbvr) +GEN_DEBUG_WRITE_REG(dbgwcr) +GEN_DEBUG_WRITE_REG(dbgwvr) + static void reset_debug_state(void) { + uint8_t brps, wrps, i; + uint64_t dfr0; + asm volatile("msr daifset, #8"); write_sysreg(0, osdlr_el1); @@ -38,11 +112,21 @@ static void reset_debug_state(void) isb(); write_sysreg(0, mdscr_el1); - /* This test only uses the first bp and wp slot. */ - write_sysreg(0, dbgbvr0_el1); - write_sysreg(0, dbgbcr0_el1); - write_sysreg(0, dbgwcr0_el1); - write_sysreg(0, dbgwvr0_el1); + write_sysreg(0, contextidr_el1); + + /* Reset all bcr/bvr/wcr/wvr registers */ + dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0); + for (i = 0; i <= brps; i++) { + write_dbgbcr(i, 0); + write_dbgbvr(i, 0); + } + wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0); + for (i = 0; i <= wrps; i++) { + write_dbgwcr(i, 0); + write_dbgwvr(i, 0); + } + isb(); } @@ -54,16 +138,10 @@ static void enable_os_lock(void) GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); } -static void install_wp(uint64_t addr) +static void enable_monitor_debug_exceptions(void) { - uint32_t wcr; uint32_t mdscr; - wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_sysreg(wcr, dbgwcr0_el1); - write_sysreg(addr, dbgwvr0_el1); - isb(); - asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; @@ -71,21 +149,76 @@ static void install_wp(uint64_t addr) isb(); } -static void install_hw_bp(uint64_t addr) +static void install_wp(uint8_t wpn, uint64_t addr) +{ + uint32_t wcr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_dbgwcr(wpn, wcr); + write_dbgwvr(wpn, addr); + + isb(); + + enable_monitor_debug_exceptions(); +} + +static void install_hw_bp(uint8_t bpn, uint64_t addr) { uint32_t bcr; - uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_sysreg(bcr, dbgbcr0_el1); - write_sysreg(addr, dbgbvr0_el1); + write_dbgbcr(bpn, bcr); + write_dbgbvr(bpn, addr); isb(); - asm volatile("msr daifclr, #8"); + enable_monitor_debug_exceptions(); +} - mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr, mdscr_el1); +static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t wcr; + uint64_t ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | + DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + write_dbgwcr(addr_wp, wcr); + write_dbgwvr(addr_wp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + +void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t addr_bcr, ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* + * Setup a normal breakpoint for Linked Address Match, and link it + * to the context-aware breakpoint. + */ + addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_ADDR_LINK_CTX | + ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + write_dbgbcr(addr_bp, addr_bcr); + write_dbgbvr(addr_bp, addr); isb(); + + enable_monitor_debug_exceptions(); } static void install_ss(void) @@ -101,52 +234,42 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(void) +static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { - GUEST_SYNC(0); + uint64_t ctx = 0xabcdef; /* a random context number */ /* Software-breakpoint */ reset_debug_state(); asm volatile("sw_bp: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); - GUEST_SYNC(1); - /* Hardware-breakpoint */ reset_debug_state(); - install_hw_bp(PC(hw_bp)); + install_hw_bp(bpn, PC(hw_bp)); asm volatile("hw_bp: nop"); GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); - GUEST_SYNC(2); - /* Hardware-breakpoint + svc */ reset_debug_state(); - install_hw_bp(PC(bp_svc)); + install_hw_bp(bpn, PC(bp_svc)); asm volatile("bp_svc: svc #0"); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); - GUEST_SYNC(3); - /* Hardware-breakpoint + software-breakpoint */ reset_debug_state(); - install_hw_bp(PC(bp_brk)); + install_hw_bp(bpn, PC(bp_brk)); asm volatile("bp_brk: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); - GUEST_SYNC(4); - /* Watchpoint */ reset_debug_state(); - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); - GUEST_SYNC(5); - /* Single-step */ reset_debug_state(); install_ss(); @@ -160,8 +283,6 @@ static void guest_code(void) GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); - GUEST_SYNC(6); - /* OS Lock does not block software-breakpoint */ reset_debug_state(); enable_os_lock(); @@ -169,30 +290,24 @@ static void guest_code(void) asm volatile("sw_bp2: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); - GUEST_SYNC(7); - /* OS Lock blocking hardware-breakpoint */ reset_debug_state(); enable_os_lock(); - install_hw_bp(PC(hw_bp2)); + install_hw_bp(bpn, PC(hw_bp2)); hw_bp_addr = 0; asm volatile("hw_bp2: nop"); GUEST_ASSERT_EQ(hw_bp_addr, 0); - GUEST_SYNC(8); - /* OS Lock blocking watchpoint */ reset_debug_state(); enable_os_lock(); write_data = '\0'; wp_data_addr = 0; - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, 0); - GUEST_SYNC(9); - /* OS Lock blocking single-step */ reset_debug_state(); enable_os_lock(); @@ -205,6 +320,27 @@ static void guest_code(void) : : : "x0"); GUEST_ASSERT_EQ(ss_addr[0], 0); + /* Linked hardware-breakpoint */ + hw_bp_addr = 0; + reset_debug_state(); + install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + asm volatile("hw_bp_ctx: nop"); + write_sysreg(0, contextidr_el1); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); + + /* Linked watchpoint */ + reset_debug_state(); + install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + GUEST_DONE(); } @@ -239,11 +375,6 @@ static void guest_svc_handler(struct ex_regs *regs) svc_addr = regs->pc; } -enum single_step_op { - SINGLE_STEP_ENABLE = 0, - SINGLE_STEP_DISABLE = 1, -}; - static void guest_code_ss(int test_cnt) { uint64_t i; @@ -254,11 +385,19 @@ static void guest_code_ss(int test_cnt) w_bvr = i << 2; w_wvr = i << 2; - /* Enable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_ENABLE); + /* + * Enable Single Step execution. Note! This _must_ be a bare + * ucall as the ucall() path uses atomic operations to manage + * the ucall structures, and the built-in "atomics" are usually + * implemented via exclusive access instructions. The exlusive + * monitor is cleared on ERET, and so taking debug exceptions + * during a LDREX=>STREX sequence will prevent forward progress + * and hang the guest/test. + */ + GUEST_UCALL_NONE(); /* - * The userspace will veriry that the pc is as expected during + * The userspace will verify that the pc is as expected during * single step execution between iter_ss_begin and iter_ss_end. */ asm volatile("iter_ss_begin:nop\n"); @@ -268,34 +407,27 @@ static void guest_code_ss(int test_cnt) bvr = read_sysreg(dbgbvr0_el1); wvr = read_sysreg(dbgwvr0_el1); + /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - /* Disable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_DISABLE); - GUEST_ASSERT(bvr == w_bvr); GUEST_ASSERT(wvr == w_wvr); } GUEST_DONE(); } -static int debug_version(struct kvm_vcpu *vcpu) +static int debug_version(uint64_t id_aa64dfr0) { - uint64_t id_aa64dfr0; - - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); - return id_aa64dfr0 & 0xf; + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } -static void test_guest_debug_exceptions(void) +static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - int stage; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -311,23 +443,19 @@ static void test_guest_debug_exceptions(void) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - for (stage = 0; stage < 11; stage++) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Stage %d: Unexpected sync ucall, got %lx", - stage, (ulong)uc.args[1]); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } + /* Specify bpn/wpn/ctx_bpn to be tested */ + vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); + pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); } done: @@ -346,7 +474,6 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_guest_debug debug = {}; vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); - ucall_init(vm, NULL); run = vcpu->run; vcpu_args_set(vcpu, 1, test_cnt); @@ -361,18 +488,12 @@ void test_single_step_from_userspace(int test_cnt) break; } - TEST_ASSERT(cmd == UCALL_SYNC, + TEST_ASSERT(cmd == UCALL_NONE, "Unexpected ucall cmd 0x%lx", cmd); - if (uc.args[1] == SINGLE_STEP_ENABLE) { - debug.control = KVM_GUESTDBG_ENABLE | - KVM_GUESTDBG_SINGLESTEP; - ss_enable = true; - } else { - debug.control = SINGLE_STEP_DISABLE; - ss_enable = false; - } - + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; vcpu_guest_debug_set(vcpu, &debug); continue; } @@ -385,6 +506,14 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); + if ((pc + 4) == (uint64_t)&iter_ss_end) { + test_pc = 0; + debug.control = KVM_GUESTDBG_ENABLE; + ss_enable = false; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + /* * If the current pc is between iter_ss_bgin and * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should @@ -400,6 +529,43 @@ void test_single_step_from_userspace(int test_cnt) kvm_vm_free(vm); } +/* + * Run debug testing using the various breakpoint#, watchpoint# and + * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. + */ +void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +{ + uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + int b, w, c; + + /* Number of breakpoints */ + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); + + /* Number of watchpoints */ + wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1; + + /* Number of context aware breakpoints */ + ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1; + + pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, + brp_num, wrp_num, ctx_brp_num); + + /* Number of normal (non-context aware) breakpoints */ + normal_brp_num = brp_num - ctx_brp_num; + + /* Lowest context aware breakpoint number */ + ctx_brp_base = normal_brp_num; + + /* Run tests with all supported breakpoints/watchpoints */ + for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) { + for (b = 0; b < normal_brp_num; b++) { + for (w = 0; w < wrp_num; w++) + test_guest_debug_exceptions(b, w, c); + } + } +} + static void help(char *name) { puts(""); @@ -414,16 +580,18 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; int opt; int ss_iteration = 10000; + uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - __TEST_REQUIRE(debug_version(vcpu) >= 6, + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); while ((opt = getopt(argc, argv, "i:")) != -1) { switch (opt) { case 'i': - ss_iteration = atoi(optarg); + ss_iteration = atoi_positive("Number of iterations", optarg); break; case 'h': default: @@ -432,7 +600,7 @@ int main(int argc, char *argv[]) } } - test_guest_debug_exceptions(); + test_guest_debug_exceptions_all(aa64dfr0); test_single_step_from_userspace(ss_iteration); return 0; diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index a39da3fe4952..bef1499fb465 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -236,7 +236,6 @@ static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) vm = vm_create_with_one_vcpu(vcpu, guest_code); - ucall_init(vm, NULL); steal_time_init(*vcpu); return vm; @@ -306,8 +305,6 @@ static void test_run(void) int main(void) { - setbuf(stdout, NULL); - test_run(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c new file mode 100644 index 000000000000..95d22cfb7b41 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -0,0 +1,1117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ + +#define _GNU_SOURCE +#include <linux/bitmap.h> +#include <fcntl.h> +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <asm/sysreg.h> +#include <linux/bitfield.h> +#include "guest_modes.h" +#include "userfaultfd_util.h" + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA (TEST_GVA + 0x8) +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_DATA (1ULL << 3) +#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) +#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) +#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) +#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) +#define CMD_SET_PTE_AF (1ULL << 8) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +static struct event_cnt { + int mmio_exits; + int fail_vcpu_runs; + int uffd_faults; + /* uffd_faults is incremented from multiple threads. */ + pthread_mutex_t uffd_faults_mutex; +} events; + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + uffd_handler_t uffd_pt_handler; + uffd_handler_t uffd_data_handler; + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); + void (*fail_vcpu_run_handler)(int ret); + uint32_t pt_memslot_flags; + uint32_t data_memslot_flags; + bool skip; + struct event_cnt expected_events; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT(guest_check_lse()); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + par = read_sysreg(par_el1); + isb(); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0" :: "r" (guest_test_memory)); + dsb(ish); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + dsb(ish); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_check_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_no_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, regs->pc); +} + +static struct uffd_args { + char *copy; + void *hva; + uint64_t paging_size; +} pt_args, data_args; + +/* Returns true to continue the test, and false if it should be skipped. */ +static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, + struct uffd_args *args, bool expect_write) +{ + uint64_t addr = msg->arg.pagefault.address; + uint64_t flags = msg->arg.pagefault.flags; + struct uffdio_copy copy; + int ret; + + TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, + "The only expected UFFD mode is MISSING"); + ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write); + ASSERT_EQ(addr, (uint64_t)args->hva); + + pr_debug("uffd fault: addr=%p write=%d\n", + (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); + + copy.src = (uint64_t)args->copy; + copy.dst = addr; + copy.len = args->paging_size; + copy.mode = 0; + + ret = ioctl(uffd, UFFDIO_COPY, ©); + if (ret == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", + addr, errno); + return ret; + } + + pthread_mutex_lock(&events.uffd_faults_mutex); + events.uffd_faults += 1; + pthread_mutex_unlock(&events.uffd_faults_mutex); + return 0; +} + +static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &pt_args, true); +} + +static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, true); +} + +static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, false); +} + +static void setup_uffd_args(struct userspace_mem_region *region, + struct uffd_args *args) +{ + args->hva = (void *)region->region.userspace_addr; + args->paging_size = region->region.memory_size; + + args->copy = malloc(args->paging_size); + TEST_ASSERT(args->copy, "Failed to allocate data copy."); + memcpy(args->copy, args->hva, args->paging_size); +} + +static void setup_uffd(struct kvm_vm *vm, struct test_params *p, + struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) +{ + struct test_desc *test = p->test_desc; + int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); + + *pt_uffd = NULL; + if (test->uffd_pt_handler) + *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, + pt_args.hva, + pt_args.paging_size, + test->uffd_pt_handler); + + *data_uffd = NULL; + if (test->uffd_data_handler) + *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, + data_args.hva, + data_args.paging_size, + test->uffd_data_handler); +} + +static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, + struct uffd_desc *data_uffd) +{ + if (test->uffd_pt_handler) + uffd_stop_demand_paging(pt_uffd); + if (test->uffd_data_handler) + uffd_stop_demand_paging(data_uffd); + + free(pt_args.copy); + free(data_args.copy); +} + +static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) +{ + TEST_FAIL("There was no UFFD fault expected."); + return -1; +} + +/* Returns false if the test should be skipped. */ +static bool punch_hole_in_backing_store(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + void *hva = (void *)region->region.userspace_addr; + uint64_t paging_size = region->region.memory_size; + int ret, fd = region->fd; + + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, paging_size); + TEST_ASSERT(ret == 0, "fallocate failed\n"); + } else { + ret = madvise(hva, paging_size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed\n"); + } + + return true; +} + +static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + + memcpy(hva, run->mmio.data, run->mmio.len); + events.mmio_exits += 1; +} + +static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + uint64_t data; + + memcpy(&data, run->mmio.data, sizeof(data)); + pr_debug("addr=%lld len=%d w=%d data=%lx\n", + run->mmio.phys_addr, run->mmio.len, + run->mmio.is_write, data); + TEST_FAIL("There was no MMIO exit expected."); +} + +static bool check_write_in_dirty_log(struct kvm_vm *vm, + struct userspace_mem_region *region, + uint64_t host_pg_nr) +{ + unsigned long *bmap; + bool first_page_dirty; + uint64_t size = region->region.memory_size; + + /* getpage_size() is not always equal to vm->page_size */ + bmap = bitmap_zalloc(size / getpagesize()); + kvm_vm_get_dirty_log(vm, region->region.slot, bmap); + first_page_dirty = test_bit(host_pg_nr, bmap); + free(bmap); + return first_page_dirty; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + struct userspace_mem_region *data_region, *pt_region; + bool continue_test = true; + + data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_backing_store(vm, pt_region); + if (cmd & CMD_HOLE_DATA) + continue_test = punch_hole_in_backing_store(vm, data_region); + if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), + "Missing write in dirty log"); + if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0), + "Missing s1ptw write in dirty log"); + if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), + "Unexpected write in dirty log"); + if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0), + "Unexpected s1ptw write in dirty log"); + + return continue_test; +} + +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure\n"); +} + +void fail_vcpu_run_mmio_no_syndrome_handler(int ret) +{ + TEST_ASSERT(errno == ENOSYS, + "The mmio handler should have returned not implemented."); + events.fail_vcpu_runs += 1; +} + +typedef uint32_t aarch64_insn_t; +extern aarch64_insn_t __exec_test[2]; + +noinline void __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +/* + * Note that this function runs on the host before the test VM starts: there's + * no need to sync the D$ and I$ caches. + */ +static void load_exec_code_for_test(struct kvm_vm *vm) +{ + uint64_t *code; + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + assert(TEST_EXEC_GVA > TEST_GVA); + code = hva + TEST_EXEC_GVA - TEST_GVA; + memcpy(code, __exec_test, sizeof(__exec_test)); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_DABT, no_dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_IABT, no_iabt_handler); +} + +static void setup_gva_maps(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + uint64_t pte_gpa; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + /* Map TEST_GVA first. This will install a new PTE. */ + virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); + /* Then map TEST_PTE_GVA to the above PTE. */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +enum pf_test_memslots { + CODE_AND_DATA_MEMSLOT, + PAGE_TABLE_MEMSLOT, + TEST_DATA_MEMSLOT, +}; + +/* + * Create a memslot for code and data at pfn=0, and test-data and PT ones + * at max_gfn. + */ +static void setup_memslots(struct kvm_vm *vm, struct test_params *p) +{ + uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm->page_size; + uint64_t max_gfn = vm_compute_max_gfn(vm); + /* Enough for 2M of code when using 4K guest pages. */ + uint64_t code_npages = 512; + uint64_t pt_size, data_size, data_gpa; + + /* + * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using + * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 + * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use + * twice that just in case. + */ + pt_size = 26 * guest_page_size; + + /* memslot sizes and gpa's must be aligned to the backing page size */ + pt_size = align_up(pt_size, backing_src_pagesz); + data_size = align_up(guest_page_size, backing_src_pagesz); + data_gpa = (max_gfn * guest_page_size) - data_size; + data_gpa = align_down(data_gpa, backing_src_pagesz); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, + CODE_AND_DATA_MEMSLOT, code_npages, 0); + vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; + vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, + PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, + p->test_desc->pt_memslot_flags); + vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, + data_size / guest_page_size, + p->test_desc->data_memslot_flags); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void setup_ucall(struct kvm_vm *vm) +{ + struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + + ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size); +} + +static void setup_default_handlers(struct test_desc *test) +{ + if (!test->mmio_handler) + test->mmio_handler = mmio_no_handler; + + if (!test->fail_vcpu_run_handler) + test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; +} + +static void check_event_counts(struct test_desc *test) +{ + ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + +/* + * This function either succeeds, skips the test (after setting test->skip), or + * fails with a TEST_FAIL that aborts all tests. + */ +static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + struct kvm_run *run; + struct ucall uc; + int ret; + + run = vcpu->run; + + for (;;) { + ret = _vcpu_run(vcpu); + if (ret) { + test->fail_vcpu_run_handler(ret); + goto done; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + test->skip = true; + goto done; + } + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + case UCALL_NONE: + if (run->exit_reason == KVM_EXIT_MMIO) + test->mmio_handler(vm, run); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct uffd_desc *pt_uffd, *data_uffd; + + print_test_banner(mode, p); + + vm = ____vm_create(mode); + setup_memslots(vm, p); + kvm_vm_elf_load(vm, program_invocation_name); + setup_ucall(vm); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + setup_gva_maps(vm); + + reset_event_counts(); + + /* + * Set some code in the data memslot for the guest to execute (only + * applicable to the EXEC tests). This has to be done before + * setup_uffd() as that function copies the memslot data for the uffd + * handler. + */ + load_exec_code_for_test(vm); + setup_uffd(vm, p, &pt_uffd, &data_uffd); + setup_abort_handlers(vm, vcpu, test); + setup_default_handlers(test); + vcpu_args_set(vcpu, 1, test); + + vcpu_run_loop(vm, vcpu, test); + + kvm_vm_free(vm); + free_uffd(test, pt_uffd, data_uffd); + + /* + * Make sure we check the events after the uffd threads have exited, + * which means they updated their respective event counters. + */ + if (!test->skip) + check_event_counts(test); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) +#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD(_access, _with_af, _mark_cmd, \ + _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ +{ \ + .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = _uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_DIRTY_LOG(_access, _with_af, _test_check) \ +{ \ + .name = SCAT3(dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af), _test_check, \ + guest_check_s1ptw_wr_in_dirty_log}, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ + _uffd_faults, _test_check) \ +{ \ + .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test_check = { _CHECK(_with_af), _test_check }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_test = _access, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ + _test_check) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits}, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ +{ \ + .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ + _uffd_data_handler, _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_uffd, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits, \ + .uffd_faults = _uffd_faults }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ + _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1, \ + .uffd_faults = _uffd_faults }, \ +} + +static struct test_desc tests[] = { + + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Punch a hole in the data backing store, and then try multiple + * accesses: reads should rturn zeroes, and writes should + * re-populate the page. Moreover, the test also check that no + * exception was generated in the guest. Note that this + * reading/writing behavior is the same as reading/writing a + * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from + * userspace. + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + + /* + * Punch holes in the data and PT backing stores and mark them for + * userfaultfd handling. This should result in 2 faults: the access + * on the data backing store, and its respective S1 page table walk + * (S1PTW). + */ + TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* no_af should also lead to a PT write. */ + TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* Note how that cas invokes the read handler. */ + TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* + * Can't test guest_at with_af as it's IMPDEF whether the AF is set. + * The S1PTW fault should still be marked as a write. + */ + TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 1), + TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + + /* + * Try accesses when the data and PT memory regions are both + * tracked for dirty logging. + */ + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + + /* + * Access when the data and PT memory regions are both marked for + * dirty logging and UFFD at the same time. The expected result is + * that writes should mark the dirty log and trigger a userfaultfd + * write fault. Reads/execs should result in a read userfaultfd + * fault, and nothing in the dirty log. Any S1PTW should result in + * a write in the dirty log and a userfaultfd write. + */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler, + 2, guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2, + guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, + uffd_data_write_handler, 2, + guest_check_write_in_dirty_log), + + /* + * Try accesses when the data memory region is marked read-only + * (with KVM_MEM_READONLY). Writes with a syndrome result in an + * MMIO exit, writes with no syndrome (e.g., CAS) result in a + * failed vcpu run, and reads/execs with and without syndroms do + * not fault. + */ + TEST_RO_MEMSLOT(guest_read64, 0, 0), + TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), + TEST_RO_MEMSLOT(guest_at, 0, 0), + TEST_RO_MEMSLOT(guest_exec, 0, 0), + TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + + /* + * Access when both the data region is both read-only and marked + * for dirty logging at the same time. The expected result is that + * for writes there should be no write in the dirty log. The + * readonly handling is the same as if the memslot was not marked + * for dirty logging: writes with a syndrome result in an MMIO + * exit, and writes with no syndrome result in a failed vcpu run. + */ + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, + 1, guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, + guest_check_no_write_in_dirty_log), + + /* + * Access when the data region is both read-only and punched with + * holes tracked with userfaultfd. The expected result is the + * union of both userfaultfd and read-only behaviors. For example, + * write accesses result in a userfaultfd write fault and an MMIO + * exit. Writes with no syndrome result in a failed vcpu run and + * no userfaultfd write fault. Reads result in userfaultfd getting + * triggered. + */ + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, + uffd_data_write_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, + uffd_no_handler, 1), + + { 0 } +}; + +static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + setbuf(stdout, NULL); + + src_type = DEFAULT_VM_MEM_SRC; + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index e0b9e81a3e09..cfa36f387948 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -79,7 +79,6 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, struct kvm_vm *vm; vm = vm_create(2); - ucall_init(vm, NULL); vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index 9c131d977a1b..eef816b80993 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -68,8 +68,6 @@ static void guest_code(void) /* we don't want to assert on run execution, hence that helper */ static int run_vcpu(struct kvm_vcpu *vcpu) { - ucall_init(vcpu->vm, NULL); - return __vcpu_run(vcpu) ? -errno : 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 17417220a083..90d854e0fcff 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -756,7 +756,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) print_args(&args); vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -818,22 +817,19 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': - nr_irqs = atoi(optarg); + nr_irqs = atoi_non_negative("Number of IRQs", optarg); if (nr_irqs > 1024 || nr_irqs % 32) help(argv[0]); break; case 'e': - eoi_split = (bool)atoi(optarg); + eoi_split = (bool)atoi_paranoid(optarg); default_args = false; break; case 'l': - level_sensitive = (bool)atoi(optarg); + level_sensitive = (bool)atoi_paranoid(optarg); default_args = false; break; case 'h': diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 76c583a07ea2..3c7defd34f56 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -44,8 +44,9 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" +#include "processor.h" /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; @@ -58,9 +59,6 @@ static enum { ITERATION_MARK_IDLE, } iteration_work; -/* Set to true when vCPU threads should exit. */ -static bool done; - /* The iteration that was last completed by each vCPU. */ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; @@ -126,7 +124,7 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) } static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct perf_test_vcpu_args *vcpu_args) + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -148,7 +146,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t gva = base_gva + page * memstress_args.guest_page_size; uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { @@ -180,16 +178,21 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * access tracking but low enough as to not make the test too brittle * over time and across architectures. * - * Note that when run in nested virtualization, this check will trigger - * much more frequently because TLB size is unlimited and since no flush - * happens, much more pages are cached there and guest won't see the - * "idle" bit cleared. + * When running the guest as a nested VM, "warn" instead of asserting + * as the TLB size is effectively unlimited and the KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle < pages / 10) - printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64 - "out of %" PRIu64 "), this will affect performance results" - ".\n", + if (still_idle >= pages / 10) { +#ifdef __x86_64__ + TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), + "vCPU%d: Too many pages still idle (%lu out of %lu)", + vcpu_idx, still_idle, pages); +#endif + printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", vcpu_idx, still_idle, pages); + } close(page_idle_fd); close(pagemap_fd); @@ -211,7 +214,7 @@ static bool spin_wait_for_next_iteration(int *current_iteration) int last_iteration = *current_iteration; do { - if (READ_ONCE(done)) + if (READ_ONCE(memstress_args.stop_vcpus)) return false; *current_iteration = READ_ONCE(iteration); @@ -220,10 +223,10 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vm *vm = memstress_args.vm; int vcpu_idx = vcpu_args->vcpu_idx; int current_iteration = 0; @@ -279,7 +282,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti static void access_memory(struct kvm_vm *vm, int nr_vcpus, enum access_type access, const char *description) { - perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); + memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, nr_vcpus, description); } @@ -303,10 +306,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int nr_vcpus = params->nr_vcpus; - vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, + vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); @@ -321,11 +324,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, nr_vcpus); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory"); - /* Set done to signal the vCPU threads to exit */ - done = true; - - perf_test_join_vcpu_threads(nr_vcpus); - perf_test_destroy_vm(vm); + memstress_join_vcpu_threads(nr_vcpus); + memstress_destroy_vm(vm); } static void help(char *name) @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.nr_vcpus = atoi(optarg); + params.nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'o': overlap_memory_access = true; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..b0e1fc4de9e2 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -20,29 +20,19 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" +#include "userfaultfd_util.h" #ifdef __NR_userfaultfd -#ifdef PRINT_PER_PAGE_UPDATES -#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - -#ifdef PRINT_PER_VCPU_UPDATES -#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + static size_t demand_paging_size; static char *guest_data_prototype; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ts_diff.tv_sec, ts_diff.tv_nsec); } -static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) +static int handle_uffd_page_request(int uffd_mode, int uffd, + struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); + uint64_t addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -116,176 +108,34 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) return 0; } -bool quit_uffd_thread; - -struct uffd_handler_args { +struct test_params { int uffd_mode; - int uffd; - int pipefd; - useconds_t delay; + useconds_t uffd_delay; + enum vm_mem_backing_src_type src_type; + bool partition_vcpu_memory_access; }; -static void *uffd_handler_thread_fn(void *arg) -{ - struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; - int uffd = uffd_args->uffd; - int pipefd = uffd_args->pipefd; - useconds_t delay = uffd_args->delay; - int64_t pages = 0; - struct timespec start; - struct timespec ts_diff; - - clock_gettime(CLOCK_MONOTONIC, &start); - while (!quit_uffd_thread) { - struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; - int r; - uint64_t addr; - - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; - - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } - - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); - TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); - return NULL; - } - - if (!(pollfd[0].revents & POLLIN)) - continue; - - r = read(uffd, &msg, sizeof(msg)); - if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; - } - - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } - - if (!(msg.event & UFFD_EVENT_PAGEFAULT)) - continue; - - if (delay) - usleep(delay); - addr = msg.arg.pagefault.address; - r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); - if (r < 0) - return NULL; - pages++; - } - - ts_diff = timespec_elapsed(start); - PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", - pages, ts_diff.tv_sec, ts_diff.tv_nsec, - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - - return NULL; -} - -static void setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd, - int uffd_mode, useconds_t uffd_delay, - struct uffd_handler_args *uffd_args, - void *hva, void *alias, uint64_t len) +static void prefault_mem(void *alias, uint64_t len) { - bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); - int uffd; - struct uffdio_api uffdio_api; - struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + size_t p; - PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", - is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); - - /* In order to get minor faults, prefault via the alias. */ - if (is_minor) { - size_t p; - - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; - - TEST_ASSERT(alias != NULL, "Alias required for minor faults"); - for (p = 0; p < (len / demand_paging_size); ++p) { - memcpy(alias + (p * demand_paging_size), - guest_data_prototype, demand_paging_size); - } + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); + for (p = 0; p < (len / demand_paging_size); ++p) { + memcpy(alias + (p * demand_paging_size), + guest_data_prototype, demand_paging_size); } - - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); - - uffdio_api.api = UFFD_API; - uffdio_api.features = 0; - ret = ioctl(uffd, UFFDIO_API, &uffdio_api); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); - - uffdio_register.range.start = (uint64_t)hva; - uffdio_register.range.len = len; - uffdio_register.mode = uffd_mode; - ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); - TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == - expected_ioctls, "missing userfaultfd ioctls"); - - uffd_args->uffd_mode = uffd_mode; - uffd_args->uffd = uffd; - uffd_args->pipefd = pipefd; - uffd_args->delay = uffd_delay; - pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, - uffd_args); - - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); } -struct test_params { - int uffd_mode; - useconds_t uffd_delay; - enum vm_mem_backing_src_type src_type; - bool partition_vcpu_memory_access; -}; - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *uffd_handler_threads = NULL; - struct uffd_handler_args *uffd_args = NULL; + struct uffd_desc **uffd_descs = NULL; struct timespec start; struct timespec ts_diff; - int *pipefds = NULL; struct kvm_vm *vm; - int r, i; + int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -296,79 +146,61 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode) { - uffd_handler_threads = - malloc(nr_vcpus * sizeof(*uffd_handler_threads)); - TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - - uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); - TEST_ASSERT(uffd_args, "Memory allocation failed"); - - pipefds = malloc(sizeof(int) * nr_vcpus * 2); - TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); + uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + TEST_ASSERT(uffd_descs, "Memory allocation failed"); for (i = 0; i < nr_vcpus; i++) { - struct perf_test_vcpu_args *vcpu_args; + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - vcpu_args = &perf_test_args.vcpu_args[i]; + vcpu_args = &memstress_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); + prefault_mem(vcpu_alias, + vcpu_args->pages * memstress_args.guest_page_size); + /* * Set up user fault fd to handle demand paging * requests. */ - r = pipe2(&pipefds[i * 2], - O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!r, "Failed to set up pipefd"); - - setup_demand_paging(vm, &uffd_handler_threads[i], - pipefds[i * 2], p->uffd_mode, - p->uffd_delay, &uffd_args[i], - vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + uffd_descs[i] = uffd_setup_demand_paging( + p->uffd_mode, p->uffd_delay, vcpu_hva, + vcpu_args->pages * memstress_args.guest_page_size, + &handle_uffd_page_request); } } pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { - char c; - /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) { - r = write(pipefds[i * 2 + 1], &c, 1); - TEST_ASSERT(r == 1, "Unable to write to pipefd"); - - pthread_join(uffd_handler_threads[i], NULL); - } + for (i = 0; i < nr_vcpus; i++) + uffd_stop_demand_paging(uffd_descs[i]); } pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * nr_vcpus / + memstress_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); free(guest_data_prototype); - if (p->uffd_mode) { - free(uffd_handler_threads); - free(uffd_args); - free(pipefds); - } + if (p->uffd_mode) + free(uffd_descs); } static void help(char *name) @@ -427,8 +259,8 @@ int main(int argc, char *argv[]) p.src_type = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'o': diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index f99e39a672d3..e9d6d1aecf89 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -16,7 +16,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __aarch64__ @@ -67,7 +67,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -128,10 +128,12 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; uint64_t phys_offset; - int wr_fract; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + uint32_t write_percent; + uint32_t random_seed; + bool random_access; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -139,7 +141,7 @@ static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; vm_mem_region_set_flags(vm, slot, flags); @@ -161,7 +163,7 @@ static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); } @@ -173,7 +175,7 @@ static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); } @@ -221,11 +223,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, p->partition_vcpu_memory_access); - perf_test_set_wr_fract(vm, p->wr_fract); + pr_info("Random seed: %u\n", p->random_seed); + memstress_set_random_seed(vm, p->random_seed); + memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -248,7 +252,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) for (i = 0; i < nr_vcpus; i++) vcpu_last_completed_iteration[i] = -1; - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + /* + * Use 100% writes during the population phase to ensure all + * memory is actually populated and not just mapped to the zero + * page. The prevents expensive copy-on-write faults from + * occurring during the dirty memory iterations below, which + * would pollute the performance results. + */ + memstress_set_write_percent(vm, 100); + memstress_set_random_access(vm, false); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -269,6 +282,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); + memstress_set_write_percent(vm, p->write_percent); + memstress_set_random_access(vm, p->random_access); + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs @@ -329,7 +345,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) * wait for them to exit. */ host_quit = true; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -345,16 +361,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) free_bitmaps(bitmaps, p->slots); arch_cleanup_vm(vm); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " - "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" - "[-x memslots]\n", name); + printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] " + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" + "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name); puts(""); + printf(" -a: access memory randomly rather than in order.\n"); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" @@ -373,16 +390,29 @@ static void help(char *name) printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); - printf(" -f: specify the fraction of pages which should be written to\n" - " as opposed to simply read, in the form\n" - " 1/<fraction of pages to write>.\n" - " (default: 1 i.e. all pages are written to.)\n"); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -r: specify the starting random seed.\n"); backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); + printf(" -w: specify the percentage of pages which should be written to\n" + " as an integer from 0-100 inclusive. This is probabilistic,\n" + " so -w X means each page has an X%% chance of writing\n" + " and a (100-X)%% chance of reading.\n" + " (default: 100 i.e. all pages are written to.)\n"); + printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" + " values (target pCPU), one for each vCPU, plus an optional\n" + " entry for the main application task (specified via entry\n" + " <nr_vcpus + 1>). If used, entries must be provided for all\n" + " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" + " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" + " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n" + " To leave the application task unpinned, drop the final entry:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n" + " (default: no pinning)\n"); puts(""); exit(0); } @@ -390,12 +420,14 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, - .wr_fract = 1, .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .random_seed = 1, + .write_percent = 100, }; int opt; @@ -406,55 +438,73 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) { switch (opt) { + case 'a': + p.random_access = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'c': + pcpu_list = optarg; + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; + break; case 'g': dirty_log_manual_caps = 0; break; - case 'i': - p.iterations = atoi(optarg); + case 'h': + help(argv[0]); break; - case 'p': - p.phys_offset = strtoull(optarg, NULL, 0); + case 'i': + p.iterations = atoi_positive("Number of iterations", optarg); break; case 'm': guest_modes_cmdline(optarg); break; case 'n': - perf_test_args.nested = true; - break; - case 'b': - guest_percpu_mem_size = parse_size(optarg); - break; - case 'f': - p.wr_fract = atoi(optarg); - TEST_ASSERT(p.wr_fract >= 1, - "Write fraction cannot be less than one"); - break; - case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, - "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + memstress_args.nested = true; break; case 'o': p.partition_vcpu_memory_access = false; break; + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 'r': + p.random_seed = atoi_positive("Random seed", optarg); + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; + case 'v': + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 'w': + p.write_percent = atoi_non_negative("Write percentage", optarg); + TEST_ASSERT(p.write_percent <= 100, + "Write percentage must be between 0 and 100"); + break; case 'x': - p.slots = atoi(optarg); + p.slots = atoi_positive("Number of slots", optarg); break; - case 'h': default: help(argv[0]); break; } } + if (pcpu_list) { + kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu, + nr_vcpus); + memstress_args.pin_vcpus = true; + } + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", p.iterations); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..936f3a8d1b83 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -24,6 +24,9 @@ #include "guest_modes.h" #include "processor.h" +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 @@ -44,20 +47,20 @@ # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) # define test_bit_le(nr, addr) \ test_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define set_bit_le(nr, addr) \ - set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define clear_bit_le(nr, addr) \ - clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_set_bit_le(nr, addr) \ - test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_clear_bit_le(nr, addr) \ - test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __set_bit_le(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __clear_bit_le(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_set_bit_le(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_clear_bit_le(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) #else -# define test_bit_le test_bit -# define set_bit_le set_bit -# define clear_bit_le clear_bit -# define test_and_set_bit_le test_and_set_bit -# define test_and_clear_bit_le test_and_clear_bit +# define test_bit_le test_bit +# define __set_bit_le __set_bit +# define __clear_bit_le __clear_bit +# define __test_and_set_bit_le __test_and_set_bit +# define __test_and_clear_bit_le __test_and_clear_bit #endif #define TEST_DIRTY_RING_COUNT 65536 @@ -226,13 +229,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -271,6 +276,24 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { + uint64_t pages; + uint32_t limit; + + /* + * We rely on vcpu exit due to full dirty ring state. Adjust + * the ring buffer size to ensure we're able to reach the + * full dirty ring state. + */ + pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3; + pages = vm_adjust_num_guest_pages(vm->mode, pages); + if (vm->page_size < getpagesize()) + pages = vm_num_host_pages(vm->mode, pages); + + limit = 1 << (31 - __builtin_clz(pages)); + test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count)); + test_dirty_ring_count = min(limit, test_dirty_ring_count); + pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count); + /* * Switch to dirty ring mode after VM creation but before any * of the vcpu creation. @@ -305,7 +328,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); - set_bit_le(cur->offset, bitmap); + __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); (*fetch_index)++; @@ -329,10 +352,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,7 +371,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); @@ -406,7 +429,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); @@ -471,13 +495,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -560,7 +585,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ - if (test_and_clear_bit_le(page, host_bmap_track)) { + if (__test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++; TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " @@ -568,7 +593,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_and_clear_bit_le(page, bmap)) { + if (__test_and_clear_bit_le(page, bmap)) { bool matched; host_dirty_count++; @@ -661,7 +686,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) * should report its dirtyness in the * next run */ - set_bit_le(page, host_bmap_track); + __set_bit_le(page, host_bmap_track); } } } @@ -681,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, return vm; } -#define DIRTY_MEM_BITS 30 /* 1G */ -#define PAGE_SHIFT_4K 12 - struct test_params { unsigned long iterations; unsigned long interval; @@ -696,6 +718,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -756,8 +779,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); - ucall_init(vm, NULL); - /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); @@ -771,6 +792,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +800,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why @@ -813,7 +836,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(host_bmap_track); - ucall_uninit(vm); kvm_vm_free(vm); } @@ -823,7 +845,7 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); - printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" -c: hint to dirty ring size, in number of entries\n"); printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index a8124f9dd68a..5f977528e09c 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -38,12 +38,25 @@ * NORMAL 4 1111:1111 * NORMAL_WT 5 1011:1011 */ -#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ - (0x04ul << (1 * 8)) | \ - (0x0cul << (2 * 8)) | \ - (0x44ul << (3 * 8)) | \ - (0xfful << (4 * 8)) | \ - (0xbbul << (5 * 8))) + +/* Linux doesn't use these memory types, so let's define them. */ +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 + +#define DEFAULT_MAIR_EL1 \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #define MPIDR_HWID_BITMASK (0xff00fffffful) @@ -92,11 +105,19 @@ enum { #define ESR_EC_MASK (ESR_EC_NUM - 1) #define ESR_EC_SVC64 0x15 +#define ESR_EC_IABT 0x21 +#define ESR_EC_DABT 0x25 #define ESR_EC_HW_BP_CURRENT 0x31 #define ESR_EC_SSTEP_CURRENT 0x33 #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k); @@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); + static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..fbc2a79369b8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -16,11 +16,24 @@ #include <linux/kvm.h> #include "linux/rbtree.h" +#include <asm/atomic.h> #include <sys/ioctl.h> #include "sparsebit.h" +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) + * #undefs and #defines static_assert() as a direct alias to _Static_assert(), + * i.e. effectively makes the message mandatory. Many KVM selftests #define + * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As + * a result, static_assert() behavior is non-deterministic and may or may not + * require a message depending on #include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + #define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 @@ -34,6 +47,7 @@ struct userspace_mem_region { struct sparsebit *unused_phy_pages; int fd; off_t offset; + enum vm_mem_backing_src_type backing_src_type; void *host_mem; void *host_alias; void *mmap_start; @@ -64,6 +78,14 @@ struct userspace_mem_regions { DECLARE_HASHTABLE(slot_hash, 9); }; +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + struct kvm_vm { int mode; unsigned long type; @@ -81,6 +103,7 @@ struct kvm_vm { struct sparsebit *vpages_mapped; bool has_irqchip; bool pgd_created; + vm_paddr_t ucall_mmio_addr; vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; @@ -92,6 +115,13 @@ struct kvm_vm { int stats_fd; struct kvm_stats_header stats_header; struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; }; @@ -104,6 +134,13 @@ struct kvm_vm { struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot); +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 @@ -194,7 +231,7 @@ static inline bool kvm_has_cap(long cap) #define kvm_do_ioctl(fd, cmd, arg) \ ({ \ - static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ ioctl(fd, cmd, arg); \ }) @@ -383,8 +420,14 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -646,13 +689,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode); struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT, 0); + return ____vm_create(VM_MODE_DEFAULT); } static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) @@ -688,6 +731,10 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + unsigned long vm_compute_max_gfn(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); @@ -718,6 +765,19 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, memcpy(&(g), _p, sizeof(g)); \ }) +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, @@ -838,4 +898,13 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h new file mode 100644 index 000000000000..72e3e358ef7b --- /dev/null +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/memstress.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_MEMSTRESS_H +#define SELFTEST_KVM_MEMSTRESS_H + +#include <pthread.h> + +#include "kvm_util.h" + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +#define MEMSTRESS_MEM_SLOT_INDEX 1 + +struct memstress_vcpu_args { + uint64_t gpa; + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + struct kvm_vcpu *vcpu; + int vcpu_idx; +}; + +struct memstress_args { + struct kvm_vm *vm; + /* The starting address and size of the guest test region. */ + uint64_t gpa; + uint64_t size; + uint64_t guest_page_size; + uint32_t random_seed; + uint32_t write_percent; + + /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ + bool nested; + /* Randomize which pages are accessed by the guest. */ + bool random_access; + /* True if all vCPUs are pinned to pCPUs */ + bool pin_vcpus; + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; + + /* Test is done, stop running vCPUs. */ + bool stop_vcpus; + + struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS]; +}; + +extern struct memstress_args memstress_args; + +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, + uint64_t vcpu_memory_bytes, int slots, + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access); +void memstress_destroy_vm(struct kvm_vm *vm); + +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); +void memstress_set_random_access(struct kvm_vm *vm, bool random_access); + +void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); +void memstress_join_vcpu_threads(int vcpus); +void memstress_guest_code(uint32_t vcpu_id); + +uint64_t memstress_nested_pages(int nr_vcpus); +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); + +#endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h deleted file mode 100644 index eaa88df0555a..000000000000 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * tools/testing/selftests/kvm/include/perf_test_util.h - * - * Copyright (C) 2020, Google LLC. - */ - -#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H -#define SELFTEST_KVM_PERF_TEST_UTIL_H - -#include <pthread.h> - -#include "kvm_util.h" - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ - -#define PERF_TEST_MEM_SLOT_INDEX 1 - -struct perf_test_vcpu_args { - uint64_t gpa; - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - struct kvm_vcpu *vcpu; - int vcpu_idx; -}; - -struct perf_test_args { - struct kvm_vm *vm; - /* The starting address and size of the guest test region. */ - uint64_t gpa; - uint64_t size; - uint64_t guest_page_size; - int wr_fract; - - /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ - bool nested; - - struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; -}; - -extern struct perf_test_args perf_test_args; - -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access); -void perf_test_destroy_vm(struct kvm_vm *vm); - -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); - -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); -void perf_test_join_vcpu_threads(int vcpus); -void perf_test_guest_code(uint32_t vcpu_id); - -uint64_t perf_test_nested_pages(int nr_vcpus); -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); - -#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index befc754ce9b3..80d6416f3012 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -77,6 +77,13 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); +struct guest_random_state { + uint32_t seed; +}; + +struct guest_random_state new_guest_random_state(uint32_t seed); +uint32_t guest_random_u32(struct guest_random_state *state); + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, @@ -152,4 +159,22 @@ static inline void *align_ptr_up(void *x, size_t size) return (void *)align_up((unsigned long)x, size); } +int atoi_paranoid(const char *num_str); + +static inline uint32_t atoi_positive(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str); + return num; +} + +static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str); + return num; +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ee79d180e07e..1a6aaef5ccae 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -22,12 +22,26 @@ enum { struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + + /* Host virtual address of this struct. */ + struct ucall *hva; }; -void ucall_init(struct kvm_vm *vm, void *arg); -void ucall_uninit(struct kvm_vm *vm); +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +void ucall_arch_do_ucall(vm_vaddr_t uc); +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); + void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); + +/* + * Perform userspace call without any associated data. This bare call avoids + * allocating a ucall struct, which can be useful if the atomic operations in + * the full ucall() are problematic and/or unwanted. Note, this will come out + * as UCALL_NONE on the backend. + */ +#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL) #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h new file mode 100644 index 000000000000..877449c34592 --- /dev/null +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM userfaultfd util + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <time.h> +#include <pthread.h> +#include <linux/userfaultfd.h> + +#include "test_util.h" + +typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); + +struct uffd_desc { + int uffd_mode; + int uffd; + int pipefds[2]; + useconds_t delay; + uffd_handler_t handler; + pthread_t thread; +}; + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler); + +void uffd_stop_demand_paging(struct uffd_desc *uffd); + +#ifdef PRINT_PER_PAGE_UPDATES +#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) +#endif + +#ifdef PRINT_PER_VCPU_UPDATES +#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) +#endif diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 58db74f68af2..901caf0e0939 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -10,6 +10,7 @@ #define SELFTEST_KVM_EVMCS_H #include <stdint.h> +#include "hyperv.h" #include "vmx.h" #define u16 uint16_t @@ -20,15 +21,6 @@ extern bool enable_evmcs; -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u64 current_nested_vmcs; -}; - struct hv_enlightened_vmcs { u32 revision_id; u32 abort; @@ -41,6 +33,8 @@ struct hv_enlightened_vmcs { u16 host_gs_selector; u16 host_tr_selector; + u16 padding16_1; + u64 host_ia32_pat; u64 host_ia32_efer; @@ -159,7 +153,7 @@ struct hv_enlightened_vmcs { u64 ept_pointer; u16 virtual_processor_id; - u16 padding16[3]; + u16 padding16_2[3]; u64 padding64_2[5]; u64 guest_physical_address; @@ -195,13 +189,13 @@ struct hv_enlightened_vmcs { u64 guest_rip; u32 hv_clean_fields; - u32 hv_padding_32; + u32 padding32_1; u32 hv_synthetic_controls; struct { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; u32 reserved:30; - } hv_enlightenments_control; + } __packed hv_enlightenments_control; u32 hv_vp_id; u32 padding32_2; u64 hv_vm_id; @@ -222,7 +216,7 @@ struct hv_enlightened_vmcs { u64 host_ssp; u64 host_ia32_int_ssp_table_addr; u64 padding64_6; -}; +} __packed; #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) @@ -243,29 +237,15 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 extern struct hv_enlightened_vmcs *current_evmcs; -extern struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); -static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +static inline void evmcs_enable(void) { - u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - enable_evmcs = true; - - return 0; } static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) @@ -278,6 +258,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) return 0; } +static inline bool load_evmcs(struct hyperv_test_pages *hv) +{ + if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs)) + return false; + + current_evmcs->revision_id = EVMCS_VERSION; + + return true; +} + static inline int evmcs_vmptrst(uint64_t *value) { *value = current_vp_assist->current_nested_vmcs & diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index b66910702c0a..9218bb5f44bf 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -9,6 +9,8 @@ #ifndef SELFTEST_KVM_HYPERV_H #define SELFTEST_KVM_HYPERV_H +#include "processor.h" + #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HYPERV_CPUID_INTERFACE 0x40000001 #define HYPERV_CPUID_VERSION 0x40000002 @@ -184,5 +186,106 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 + +/* + * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' + * is set to the hypercall status (if no exception occurred). + */ +static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, + uint64_t *hv_status) +{ + uint64_t error_code; + uint8_t vector; + + /* Note both the hypercall and the "asm safe" clobber r9-r11. */ + asm volatile("mov %[output_address], %%r8\n\t" + KVM_ASM_SAFE("vmcall") + : "=a" (*hv_status), + "+c" (control), "+d" (input_address), + KVM_ASM_SAFE_OUTPUTS(vector, error_code) + : [output_address] "r"(output_address), + "a" (-EFAULT) + : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); + return vector; +} + +/* Issue a Hyper-V hypercall and assert that it succeeded. */ +static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address) +{ + uint64_t hv_status; + uint8_t vector; + + vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); + + GUEST_ASSERT(!vector); + GUEST_ASSERT((hv_status & 0xffff) == 0); +} + +/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */ +static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) +{ + int i; + + for (i = 0; i < n_sse_regs; i++) + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); +} + +/* Proper HV_X64_MSR_GUEST_OS_ID value */ +#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) + +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; + __u64 current_nested_vmcs; +} __packed; + +extern struct hv_vp_assist_page *current_vp_assist; + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); + +struct hyperv_test_pages { + /* VP assist page */ + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + /* Partition assist page */ + void *partition_assist_hva; + uint64_t partition_assist_gpa; + void *partition_assist; + + /* Enlightened VMCS */ + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; +}; + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva); #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..b1a31de7108a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -63,16 +63,21 @@ struct kvm_x86_cpu_feature { u8 reg; u8 bit; }; -#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ -({ \ - struct kvm_x86_cpu_feature feature = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .bit = __bit, \ - }; \ - \ - feature; \ +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + feature; \ }) /* @@ -89,6 +94,8 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31) +#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) #define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) @@ -96,6 +103,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) #define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) #define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) +#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2) #define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) #define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) #define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) @@ -109,6 +117,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) +#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) #define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) #define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) #define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) @@ -162,6 +171,102 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) #define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) +/* + * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit + * value/property as opposed to a single-bit feature. Again, pack the info + * into a 64-bit value to pass by value with no overhead. + */ +struct kvm_x86_cpu_property { + u32 function; + u8 index; + u8 reg; + u8 lo_bit; + u8 hi_bit; +}; +#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ +({ \ + struct kvm_x86_cpu_property property = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .lo_bit = low_bit, \ + .hi_bit = high_bit, \ + }; \ + \ + kvm_static_assert(low_bit < high_bit); \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + property; \ +}) + +#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) +#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) + +#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) +#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) +#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) +#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) +#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) +#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) + +#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) +#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) + +#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) + +/* + * Intel's architectural PMU events are bizarre. They have a "feature" bit + * that indicates the feature is _not_ supported, and a property that states + * the length of the bit mask of unsupported features. A feature is supported + * if the size of the bit mask is larger than the "unavailable" bit, and said + * bit is not set. + * + * Wrap the "unavailable" feature to simplify checking whether or not a given + * architectural event is supported. + */ +struct kvm_x86_pmu_feature { + struct kvm_x86_cpu_feature anti_feature; +}; +#define KVM_X86_PMU_FEATURE(name, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \ + }; \ + \ + feature; \ +}) + +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} + /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) #define PTE_WRITABLE_MASK BIT_ULL(1) @@ -172,12 +277,18 @@ struct kvm_x86_cpu_feature { #define PTE_GLOBAL_MASK BIT_ULL(8) #define PTE_NX_MASK BIT_ULL(63) +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) + #define PAGE_SHIFT 12 #define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) -#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) -#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) +#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) +#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) + +#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) +#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) /* General Registers in 64-Bit Mode */ struct gpr64_regs { @@ -425,82 +536,143 @@ static inline void cpuid(uint32_t function, return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +static inline uint32_t this_cpu_fms(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + return eax; +} + +static inline uint32_t this_cpu_family(void) +{ + return x86_family(this_cpu_fms()); +} + +static inline uint32_t this_cpu_model(void) +{ + return x86_model(this_cpu_fms()); +} + +static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { uint32_t gprs[4]; - __cpuid(feature.function, feature.index, + __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); - return gprs[feature.reg] & BIT(feature.bit); + return (gprs[reg] & GENMASK(hi, lo)) >> lo; +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return __this_cpu_has(feature.function, feature.index, + feature.reg, feature.bit, feature.bit); } -#define SET_XMM(__var, __xmm) \ - asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) +static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +{ + return __this_cpu_has(property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} -static inline void set_xmm(int n, unsigned long val) +static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) { - switch (n) { + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { case 0: - SET_XMM(val, xmm0); + max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !this_cpu_has(feature.anti_feature); +} + +typedef u32 __attribute__((vector_size(16))) sse128_t; +#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; } +#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; }) +#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; }) + +static inline void read_sse_reg(int reg, sse128_t *data) +{ + switch (reg) { + case 0: + asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: - SET_XMM(val, xmm1); + asm("movdqa %%xmm1, %0" : "=m"(*data)); break; case 2: - SET_XMM(val, xmm2); + asm("movdqa %%xmm2, %0" : "=m"(*data)); break; case 3: - SET_XMM(val, xmm3); + asm("movdqa %%xmm3, %0" : "=m"(*data)); break; case 4: - SET_XMM(val, xmm4); + asm("movdqa %%xmm4, %0" : "=m"(*data)); break; case 5: - SET_XMM(val, xmm5); + asm("movdqa %%xmm5, %0" : "=m"(*data)); break; case 6: - SET_XMM(val, xmm6); + asm("movdqa %%xmm6, %0" : "=m"(*data)); break; case 7: - SET_XMM(val, xmm7); + asm("movdqa %%xmm7, %0" : "=m"(*data)); break; + default: + BUG(); } } -#define GET_XMM(__xmm) \ -({ \ - unsigned long __val; \ - asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \ - __val; \ -}) - -static inline unsigned long get_xmm(int n) +static inline void write_sse_reg(int reg, const sse128_t *data) { - assert(n >= 0 && n <= 7); - - switch (n) { + switch (reg) { case 0: - return GET_XMM(xmm0); + asm("movdqa %0, %%xmm0" : : "m"(*data)); + break; case 1: - return GET_XMM(xmm1); + asm("movdqa %0, %%xmm1" : : "m"(*data)); + break; case 2: - return GET_XMM(xmm2); + asm("movdqa %0, %%xmm2" : : "m"(*data)); + break; case 3: - return GET_XMM(xmm3); + asm("movdqa %0, %%xmm3" : : "m"(*data)); + break; case 4: - return GET_XMM(xmm4); + asm("movdqa %0, %%xmm4" : : "m"(*data)); + break; case 5: - return GET_XMM(xmm5); + asm("movdqa %0, %%xmm5" : : "m"(*data)); + break; case 6: - return GET_XMM(xmm6); + asm("movdqa %0, %%xmm6" : : "m"(*data)); + break; case 7: - return GET_XMM(xmm7); + asm("movdqa %0, %%xmm7" : : "m"(*data)); + break; + default: + BUG(); } - - /* never reached */ - return 0; } static inline void cpu_relax(void) @@ -508,11 +680,6 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } -#define vmmcall() \ - __asm__ __volatile__( \ - "vmmcall\n" \ - ) - #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ @@ -526,23 +693,6 @@ static inline void cpu_relax(void) bool is_intel_cpu(void); bool is_amd_cpu(void); -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} - struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); void kvm_x86_state_cleanup(struct kvm_x86_state *state); @@ -604,10 +754,27 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); } +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +static inline uint32_t kvm_cpu_fms(void) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; +} + +static inline uint32_t kvm_cpu_family(void) +{ + return x86_family(kvm_cpu_fms()); +} + +static inline uint32_t kvm_cpu_model(void) +{ + return x86_model(kvm_cpu_fms()); +} + bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, struct kvm_x86_cpu_feature feature); @@ -616,6 +783,42 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); + +static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +{ + return kvm_cpuid_property(kvm_get_supported_cpuid(), property); +} + +static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !kvm_cpu_has(feature.anti_feature); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + @@ -639,8 +842,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) return cpuid; } -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); @@ -701,17 +902,6 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function, - uint32_t index) -{ - return get_cpuid_entry(kvm_get_supported_cpuid(), function, index); -} - -static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function) -{ - return __kvm_get_supported_cpuid_entry(function, 0); -} - uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); @@ -723,15 +913,6 @@ static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); } -static inline uint32_t kvm_get_cpuid_max_basic(void) -{ - return kvm_get_supported_cpuid_entry(0)->eax; -} - -static inline uint32_t kvm_get_cpuid_max_extended(void) -{ - return kvm_get_supported_cpuid_entry(0x80000000)->eax; -} void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); @@ -748,6 +929,19 @@ struct ex_regs { uint64_t rflags; }; +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, @@ -764,7 +958,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * for recursive faults when accessing memory in the handler. The downside to * using registers is that it restricts what registers can be used by the actual * instruction. But, selftests are 64-bit only, making register* pressure a - * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved + * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved * by the callee, and except for r11 are not implicit parameters to any * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V @@ -780,39 +974,52 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * * REGISTER OUTPUTS: * r9 = exception vector (non-zero) + * r10 = error code */ #define KVM_ASM_SAFE(insn) \ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "movb $0, %[vector]\n\t" \ - "jmp 3f\n\t" \ + "xor %%r9, %%r9\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ - "3:\n\t" + "mov %%r10, %[error_code]\n\t" -#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) +#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" -#define kvm_asm_safe(insn, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ }) static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) { + uint64_t error_code; uint8_t vector; uint32_t a, d; asm volatile(KVM_ASM_SAFE("rdmsr") - : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector) + : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code) : "c"(msr) : KVM_ASM_SAFE_CLOBBERS); @@ -827,10 +1034,9 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr); -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte); +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); @@ -882,4 +1088,27 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) #define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ XSTATE_XTILE_DATA_MASK) + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index c8343ff84f7f..4803e1056055 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -58,6 +58,27 @@ enum { INTERCEPT_RDPRU, }; +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) + +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_cr; @@ -106,7 +127,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. */ - u8 reserved_sw[32]; + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index 7aee6244ab6a..044f0f872ba9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -32,6 +32,20 @@ struct svm_test_data { uint64_t msr_gpa; }; +static inline void vmmcall(void) +{ + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + #define stgi() \ __asm__ __volatile__( \ "stgi\n" \ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 71b290b6469d..5f0c0a29c556 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -437,11 +437,16 @@ static inline int vmresume(void) static inline void vmcall(void) { - /* Currently, L1 destroys our GPRs during vmexits. */ - __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } static inline int vmread(uint64_t encoding, uint64_t *value) @@ -517,14 +522,6 @@ struct vmx_pages { uint64_t vmwrite_gpa; void *vmwrite; - void *vp_assist_hva; - uint64_t vp_assist_gpa; - void *vp_assist; - - void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; - void *enlightened_vmcs; - void *eptp_hva; uint64_t eptp_gpa; void *eptp; @@ -572,7 +569,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); -bool kvm_vm_has_ept(struct kvm_vm *vm); +bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index f42c6ac6d71d..b3b00be1ef82 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -289,7 +289,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ - ucall_init(vm, NULL); sync_global_to_guest(vm, test_args); ret = sem_init(&test_stage_updated, 0, 0); @@ -417,7 +416,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(ret == 0, "Error in sem_destroy"); free(vcpu_threads); - ucall_uninit(vm); kvm_vm_free(vm); } @@ -461,8 +459,8 @@ int main(int argc, char *argv[]) p.test_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 's': diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..5972a23b2765 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -11,6 +11,7 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include <linux/bitfield.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -76,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -133,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + uint64_t attr_idx = MT_NORMAL; _virt_pg_map(vm, vaddr, paddr, attr_idx); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; @@ -169,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) TEST_FAIL("Page table levels must be 2, 3, or 4"); } - return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + return ptep; unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(1); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep = virt_get_pte_hva(vm, gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) @@ -318,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + size_t stack_size; + uint64_t stack_vaddr; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + aarch64_vcpu_setup(vcpu, init); vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); @@ -428,8 +441,8 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size); + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } @@ -486,24 +499,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = ((val >> 28) & 0xf) != 0xf; - *ps64k = ((val >> 24) & 0xf) == 0; - *ps16k = ((val >> 20) & 0xf) != 0; + *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf; + *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0; + *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0; close(vcpu_fd); close(vm_fd); close(kvm_fd); } -/* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ -void __attribute__((constructor)) init_guest_modes(void) -{ - guest_modes_append_default(); -} - void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res) @@ -528,3 +532,22 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); } + +void kvm_selftest_arch_init(void) +{ + /* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ + guest_modes_append_default(); +} + +void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +{ + /* + * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space + * is [0, 2^(64 - TCR_EL1.T0SZ)). + */ + sparsebit_set_num(vm->vpages_valid, 0, + (1ULL << vm->va_bits) >> vm->page_shift); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index ed237b744690..562c16dfbb00 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,108 +6,36 @@ */ #include "kvm_util.h" +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ static vm_vaddr_t *ucall_exit_mmio_addr; -static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) -{ - if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) - return false; - - virt_pg_map(vm, gpa, gpa); - - ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; - sync_global_to_guest(vm, ucall_exit_mmio_addr); - - return true; -} - -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - vm_paddr_t gpa, start, end, step, offset; - unsigned int bits; - bool ret; + virt_pg_map(vm, mmio_gpa, mmio_gpa); - if (arg) { - gpa = (vm_paddr_t)arg; - ret = ucall_mmio_init(vm, gpa); - TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); - return; - } + vm->ucall_mmio_addr = mmio_gpa; - /* - * Find an address within the allowed physical and virtual address - * spaces, that does _not_ have a KVM memory region associated with - * it. Identity mapping an address like this allows the guest to - * access it, but as KVM doesn't know what to do with it, it - * will assume it's something userspace handles and exit with - * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. - * Here we start with a guess that the addresses around 5/8th - * of the allowed space are unmapped and then work both down and - * up from there in 1/16th allowed space sized steps. - * - * Note, we need to use VA-bits - 1 when calculating the allowed - * virtual address space for an identity mapping because the upper - * half of the virtual address space is the two's complement of the - * lower and won't match physical addresses. - */ - bits = vm->va_bits - 1; - bits = min(vm->pa_bits, bits); - end = 1ul << bits; - start = end * 5 / 8; - step = end / 16; - for (offset = 0; offset < end - start; offset += step) { - if (ucall_mmio_init(vm, start - offset)) - return; - if (ucall_mmio_init(vm, start + offset)) - return; - } - TEST_FAIL("Can't find a ucall mmio address"); + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - ucall_exit_mmio_addr = 0; - sync_global_to_guest(vm, ucall_exit_mmio_addr); + WRITE_ONCE(*ucall_exit_mmio_addr, uc); } -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = {}; - va_list va; - int i; - - WRITE_ONCE(uc.cmd, cmd); - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); - va_end(va); - - WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); -} - -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { - vm_vaddr_t gva; - - TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), "Unexpected ucall exit mmio address access"); - memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)(*((uint64_t *)run->mmio.data)); } - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..820ac2d08c98 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -138,7 +138,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) offset = hdr.e_phoff + (n1 * hdr.e_phentsize); offset_rv = lseek(fd, offset, SEEK_SET); TEST_ASSERT(offset_rv == offset, - "Failed to seek to begining of program header %u,\n" + "Failed to seek to beginning of program header %u,\n" " filename: %s\n" " rv: %jd errno: %i", n1, filename, (intmax_t) offset_rv, errno); @@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); + vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, + MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..c88c3ace16d2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -11,6 +11,7 @@ #include "processor.h" #include <assert.h> +#include <sched.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -185,12 +186,18 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) +__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) { - struct kvm_vm *vm; + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, + (1ULL << (vm->va_bits - 1)) >> vm->page_shift); +} - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode) +{ + struct kvm_vm *vm; vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -276,20 +283,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - sparsebit_set_num(vm->vpages_valid, - 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); - sparsebit_set_num(vm->vpages_valid, - (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, - (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + vm_vaddr_populate_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); - if (nr_pages != 0) - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - 0, 0, nr_pages, 0); return vm; } @@ -334,15 +334,32 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, { uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); + struct userspace_mem_region *slot0; struct kvm_vm *vm; + int i; + + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); - vm = ____vm_create(mode, nr_pages); + vm = ____vm_create(mode); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; kvm_vm_elf_load(vm, program_invocation_name); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + /* + * TODO: Add proper defines to protect the library's memslots, and then + * carve out memslot1 for the ucall MMIO address. KVM treats writes to + * read-only memslots as MMIO, and creating a read-only memslot for the + * MMIO region would prevent silently clobbering the MMIO region. + */ + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); + return vm; } @@ -443,6 +460,59 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } +void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +{ + cpu_set_t mask; + int r; + + CPU_ZERO(&mask); + CPU_SET(pcpu, &mask); + r = sched_setaffinity(0, sizeof(mask), &mask); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); +} + +static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +{ + uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + + TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), + "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + return pcpu; +} + +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus) +{ + cpu_set_t allowed_mask; + char *cpu, *cpu_list; + char delim[2] = ","; + int i, r; + + cpu_list = strdup(pcpus_string); + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + + r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_getaffinity() failed"); + + cpu = strtok(cpu_list, delim); + + /* 1. Get all pcpus for vcpus. */ + for (i = 0; i < nr_vcpus; i++) { + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); + cpu = strtok(NULL, delim); + } + + /* 2. Check if the main worker needs to be pinned. */ + if (cpu) { + kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + cpu = strtok(NULL, delim); + } + + TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); + free(cpu_list); +} + /* * Userspace Memory Region Find * @@ -586,6 +656,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + if (region->fd >= 0) { + /* There's an extra map when using shared memory. */ + ret = munmap(region->mmap_alias, region->mmap_size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + close(region->fd); + } free(region); } @@ -923,6 +999,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->name); } + region->backing_src_type = src_type; region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1151,8 +1228,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * TEST_ASSERT failure occurs for invalid input or no area of at least * sz unallocated bytes >= vaddr_min is available. */ -static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; @@ -1217,32 +1294,15 @@ va_found: return pgidx_start * vm->page_size; } -/* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. - */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, 0); + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type]); /* * Find an unused range of virtual page addresses of at least @@ -1256,14 +1316,37 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, - vaddr >> vm->page_shift); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; } /* + * VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. The allocated physical space comes from the TEST_DATA memory region. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +{ + return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); +} + +/* * VM Virtual Address Allocate Pages * * Input Args: @@ -1282,6 +1365,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +{ + return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); +} + /* * VM Virtual Address Allocate Page * @@ -1330,6 +1418,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, virt_pg_map(vm, vaddr, paddr); vaddr += page_size; paddr += page_size; + + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } } @@ -1506,7 +1596,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = vcpu->vm->page_size; + uint32_t page_size = getpagesize(); uint32_t size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); @@ -1847,7 +1937,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) { - return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); } /* @@ -2021,3 +2112,19 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, break; } } + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ +} + +__weak void kvm_selftest_arch_init(void) +{ +} + +void __attribute((constructor)) kvm_selftest_init(void) +{ + /* Tell stdout not to buffer its content. */ + setbuf(stdout, NULL); + + kvm_selftest_arch_init(); +} diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/memstress.c index 9618b37c66f7..5f1d3173c238 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,13 +2,15 @@ /* * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE + #include <inttypes.h> #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" -struct perf_test_args perf_test_args; +struct memstress_args memstress_args; /* * Guest virtual memory offset of the testing memory slot. @@ -31,7 +33,7 @@ struct vcpu_thread { static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; /* The function run by each vCPU thread, as provided by the test. */ -static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); +static void (*vcpu_thread_fn)(struct memstress_vcpu_args *); /* Set to true once all vCPU threads are up and running. */ static bool all_vcpu_threads_running; @@ -42,14 +44,19 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void perf_test_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(uint32_t vcpu_idx) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; + struct guest_random_state rand_state; uint64_t gva; uint64_t pages; + uint64_t addr; + uint64_t page; int i; + rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -58,9 +65,14 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + if (args->random_access) + page = guest_random_u32(&rand_state) % pages; + else + page = i; + + addr = gva + (page * args->guest_page_size); - if (i % pta->wr_fract == 0) + if (guest_random_u32(&rand_state) % 100 < args->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -70,17 +82,17 @@ void perf_test_guest_code(uint32_t vcpu_idx) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, +void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args; int i; for (i = 0; i < nr_vcpus; i++) { - vcpu_args = &pta->vcpu_args[i]; + vcpu_args = &args->vcpu_args[i]; vcpu_args->vcpu = vcpus[i]; vcpu_args->vcpu_idx = i; @@ -89,29 +101,29 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, vcpu_args->gva = guest_test_virt_mem + (i * vcpu_memory_bytes); vcpu_args->pages = vcpu_memory_bytes / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes); + args->guest_page_size; + vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes); } else { vcpu_args->gva = guest_test_virt_mem; vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa; + args->guest_page_size; + vcpu_args->gpa = args->gpa; } vcpu_args_set(vcpus[i], 1, i); pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", i, vcpu_args->gpa, vcpu_args->gpa + - (vcpu_args->pages * pta->guest_page_size)); + (vcpu_args->pages * args->guest_page_size)); } } -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; + struct memstress_args *args = &memstress_args; struct kvm_vm *vm; uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); @@ -121,20 +133,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); /* By default vCPUs will write to memory. */ - pta->wr_fract = 1; + args->write_percent = 100; /* * Snapshot the non-huge page size. This is used by the guest code to * access/dirty pages at the logging granularity. */ - pta->guest_page_size = vm_guest_mode_params[mode].page_size; + args->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size); TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", @@ -144,8 +156,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * If using nested, allocate extra pages for the nested page tables and * in-memory data structures. */ - if (pta->nested) - slot0_pages += perf_test_nested_pages(nr_vcpus); + if (args->nested) + slot0_pages += memstress_nested_pages(nr_vcpus); /* * Pass guest_num_pages to populate the page tables for test memory. @@ -153,9 +165,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * effect as KVM allows aliasing HVAs in meslots. */ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, - perf_test_guest_code, vcpus); + memstress_guest_code, vcpus); - pta->vm = vm; + args->vm = vm; /* Put the test region at the top guest physical memory. */ region_end_gfn = vm->max_gfn + 1; @@ -165,8 +177,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * When running vCPUs in L2, restrict the test region to 48 bits to * avoid needing 5-level page tables to identity map L2. */ - if (pta->nested) - region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); + if (args->nested) + region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size); #endif /* * If there should be more memory in the guest test region than there @@ -178,63 +190,72 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, " nr_vcpus: %d wss: %" PRIx64 "]\n", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); - pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size; - pta->gpa = align_down(pta->gpa, backing_src_pagesz); + args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; + args->gpa = align_down(args->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - pta->gpa = align_down(pta->gpa, 1 << 20); + args->gpa = align_down(args->gpa, 1 << 20); #endif - pta->size = guest_num_pages * pta->guest_page_size; + args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", - pta->gpa, pta->gpa + pta->size); + args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; + vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, - PERF_TEST_MEM_SLOT_INDEX + i, + MEMSTRESS_MEM_SLOT_INDEX + i, region_pages, 0); } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages); - perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, + memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); - if (pta->nested) { + if (args->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); - perf_test_setup_nested(vm, nr_vcpus, vcpus); + memstress_setup_nested(vm, nr_vcpus, vcpus); } - ucall_init(vm, NULL); - /* Export the shared variables to the guest. */ - sync_global_to_guest(vm, perf_test_args); + sync_global_to_guest(vm, memstress_args); return vm; } -void perf_test_destroy_vm(struct kvm_vm *vm) +void memstress_destroy_vm(struct kvm_vm *vm) { - ucall_uninit(vm); kvm_vm_free(vm); } -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) { - perf_test_args.wr_fract = wr_fract; - sync_global_to_guest(vm, perf_test_args); + memstress_args.write_percent = write_percent; + sync_global_to_guest(vm, memstress_args.write_percent); } -uint64_t __weak perf_test_nested_pages(int nr_vcpus) +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) +{ + memstress_args.random_seed = random_seed; + sync_global_to_guest(vm, memstress_args.random_seed); +} + +void memstress_set_random_access(struct kvm_vm *vm, bool random_access) +{ + memstress_args.random_access = random_access; + sync_global_to_guest(vm, memstress_args.random_access); +} + +uint64_t __weak memstress_nested_pages(int nr_vcpus) { return 0; } -void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) +void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) { pr_info("%s() not support on this architecture, skipping.\n", __func__); exit(KSFT_SKIP); @@ -243,6 +264,10 @@ void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v static void *vcpu_thread_main(void *data) { struct vcpu_thread *vcpu = data; + int vcpu_idx = vcpu->vcpu_idx; + + if (memstress_args.pin_vcpus) + kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -255,18 +280,19 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]); + vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]); return NULL; } -void perf_test_start_vcpu_threads(int nr_vcpus, - void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void memstress_start_vcpu_threads(int nr_vcpus, + void (*vcpu_fn)(struct memstress_vcpu_args *)) { int i; vcpu_thread_fn = vcpu_fn; WRITE_ONCE(all_vcpu_threads_running, false); + WRITE_ONCE(memstress_args.stop_vcpus, false); for (i = 0; i < nr_vcpus; i++) { struct vcpu_thread *vcpu = &vcpu_threads[i]; @@ -285,10 +311,12 @@ void perf_test_start_vcpu_threads(int nr_vcpus, WRITE_ONCE(all_vcpu_threads_running, true); } -void perf_test_join_vcpu_threads(int nr_vcpus) +void memstress_join_vcpu_threads(int nr_vcpus) { int i; + WRITE_ONCE(memstress_args.stop_vcpus, true); + for (i = 0; i < nr_vcpus; i++) pthread_join(vcpu_threads[i].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 604478151212..d146ca71e0c0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { int r; - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + size_t stack_size; + unsigned long stack_vaddr; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 087b9740bc8f..9a3476a2dfca 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,11 +10,7 @@ #include "kvm_util.h" #include "processor.h" -void ucall_init(struct kvm_vm *vm, void *arg) -{ -} - -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } @@ -44,47 +40,22 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, - (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); + uc, 0, 0, 0, 0, 0); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_RISCV_SBI && run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - memcpy(&ucall, - addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); - - break; + return (void *)run->riscv_sbi.args[0]; case KVM_RISCV_SELFTESTS_SBI_UNEXP: vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); @@ -93,6 +64,5 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) break; } } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 89d7340d9cbd..15945121daf1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); vm->pgd = paddr; @@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 73dc4e21190f..a7f02dc372cf 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,40 +6,19 @@ */ #include "kvm_util.h" -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && @@ -47,13 +26,7 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)run->s.regs.gprs[reg]; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6d23878bbfe1..5c22fa4c2825 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,23 @@ #include "test_util.h" /* + * Random number generator that is usable from guest code. This is the + * Park-Miller LCG using standard constants. + */ + +struct guest_random_state new_guest_random_state(uint32_t seed) +{ + struct guest_random_state s = {.seed = seed}; + return s; +} + +uint32_t guest_random_u32(struct guest_random_state *state) +{ + state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + return state->seed; +} + +/* * Parses "[0-9]+[kmgt]?". */ size_t parse_size(const char *size) @@ -334,3 +351,22 @@ long get_run_delay(void) return val[1]; } + +int atoi_paranoid(const char *num_str) +{ + char *end_ptr; + long num; + + errno = 0; + num = strtol(num_str, &end_ptr, 0); + TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str); + TEST_ASSERT(num_str != end_ptr, + "strtol(\"%s\") didn't find a valid integer.", num_str); + TEST_ASSERT(*end_ptr == '\0', + "strtol(\"%s\") failed to parse trailing characters \"%s\".", + num_str, end_ptr); + TEST_ASSERT(num >= INT_MIN && num <= INT_MAX, + "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX); + + return num; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c new file mode 100644 index 000000000000..0cc0971ce60e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "kvm_util.h" +#include "linux/types.h" +#include "linux/bitmap.h" +#include "linux/atomic.h" + +struct ucall_header { + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); + struct ucall ucalls[KVM_MAX_VCPUS]; +}; + +/* + * ucall_pool holds per-VM values (global data is duplicated by each VM), it + * must not be accessed from host code. + */ +static struct ucall_header *ucall_pool; + +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + struct ucall_header *hdr; + struct ucall *uc; + vm_vaddr_t vaddr; + int i; + + vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA); + hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + memset(hdr, 0, sizeof(*hdr)); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + uc = &hdr->ucalls[i]; + uc->hva = uc; + } + + write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + + ucall_arch_init(vm, mmio_gpa); +} + +static struct ucall *ucall_alloc(void) +{ + struct ucall *uc; + int i; + + GUEST_ASSERT(ucall_pool); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!test_and_set_bit(i, ucall_pool->in_use)) { + uc = &ucall_pool->ucalls[i]; + memset(uc->args, 0, sizeof(uc->args)); + return uc; + } + } + + GUEST_ASSERT(0); + return NULL; +} + +static void ucall_free(struct ucall *uc) +{ + /* Beware, here be pointer arithmetic. */ + clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall *uc; + va_list va; + int i; + + uc = ucall_alloc(); + + WRITE_ONCE(uc->cmd, cmd); + + nargs = min(nargs, UCALL_MAX_ARGS); + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + struct ucall ucall; + void *addr; + + if (!uc) + uc = &ucall; + + addr = ucall_arch_get_ucall(vcpu); + if (addr) { + memcpy(uc, addr, sizeof(*uc)); + vcpu_run_complete_io(vcpu); + } else { + memset(uc, 0, sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c new file mode 100644 index 000000000000..92cef20902f1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM userfaultfd util + * Adapted from demand_paging_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <poll.h> +#include <pthread.h> +#include <linux/userfaultfd.h> +#include <sys/syscall.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "memstress.h" +#include "userfaultfd_util.h" + +#ifdef __NR_userfaultfd + +static void *uffd_handler_thread_fn(void *arg) +{ + struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; + int uffd = uffd_desc->uffd; + int pipefd = uffd_desc->pipefds[0]; + useconds_t delay = uffd_desc->delay; + int64_t pages = 0; + struct timespec start; + struct timespec ts_diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { + struct uffd_msg msg; + struct pollfd pollfd[2]; + char tmp_chr; + int r; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd; + pollfd[1].events = POLLIN; + + r = poll(pollfd, 2, -1); + switch (r) { + case -1: + pr_info("poll err"); + continue; + case 0: + continue; + case 1: + break; + default: + pr_info("Polling uffd returned %d", r); + return NULL; + } + + if (pollfd[0].revents & POLLERR) { + pr_info("uffd revents has POLLERR"); + return NULL; + } + + if (pollfd[1].revents & POLLIN) { + r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(r == 1, + "Error reading pipefd in UFFD thread\n"); + return NULL; + } + + if (!(pollfd[0].revents & POLLIN)) + continue; + + r = read(uffd, &msg, sizeof(msg)); + if (r == -1) { + if (errno == EAGAIN) + continue; + pr_info("Read of uffd got errno %d\n", errno); + return NULL; + } + + if (r != sizeof(msg)) { + pr_info("Read on uffd returned unexpected size: %d bytes", r); + return NULL; + } + + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) + continue; + + if (delay) + usleep(delay); + r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); + if (r < 0) + return NULL; + pages++; + } + + ts_diff = timespec_elapsed(start); + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", + pages, ts_diff.tv_sec, ts_diff.tv_nsec, + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + + return NULL; +} + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler) +{ + struct uffd_desc *uffd_desc; + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); + int uffd; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + int ret; + + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", + is_minor ? "MINOR" : "MISSING", + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + + uffd_desc = malloc(sizeof(struct uffd_desc)); + TEST_ASSERT(uffd_desc, "malloc failed"); + + /* In order to get minor faults, prefault via the alias. */ + if (is_minor) + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, + "ioctl UFFDIO_API failed: %" PRIu64, + (uint64_t)uffdio_api.api); + + uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.len = len; + uffdio_register.mode = uffd_mode; + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, + "ioctl UFFDIO_REGISTER failed"); + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == + expected_ioctls, "missing userfaultfd ioctls"); + + ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd"); + + uffd_desc->uffd_mode = uffd_mode; + uffd_desc->uffd = uffd; + uffd_desc->delay = delay; + uffd_desc->handler = handler; + pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, + uffd_desc); + + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", + hva, hva + len); + + return uffd_desc; +} + +void uffd_stop_demand_paging(struct uffd_desc *uffd) +{ + char c = 0; + int ret; + + ret = write(uffd->pipefds[1], &c, 1); + TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + + ret = pthread_join(uffd->thread, NULL); + TEST_ASSERT(ret == 0, "Pthread_join failed."); + + close(uffd->uffd); + + close(uffd->pipefds[1]); + close(uffd->pipefds[0]); + + free(uffd); +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c new file mode 100644 index 000000000000..efb7e7a1354d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include <stdint.h> +#include "processor.h" +#include "hyperv.h" + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c index 0f344a7c89c4..d61e623afc8c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to perf_test_util.c. + * x86_64-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -11,25 +11,25 @@ #include "test_util.h" #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "vmx.h" -void perf_test_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(uint64_t vcpu_id) { - perf_test_guest_code(vcpu_id); + memstress_guest_code(vcpu_id); vmcall(); } -extern char perf_test_l2_guest_entry[]; +extern char memstress_l2_guest_entry[]; __asm__( -"perf_test_l2_guest_entry:" +"memstress_l2_guest_entry:" " mov (%rsp), %rdi;" -" call perf_test_l2_guest_code;" +" call memstress_l2_guest_code;" " ud2;" ); -static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -42,14 +42,14 @@ static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; *rsp = vcpu_id; - prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); + prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t perf_test_nested_pages(int nr_vcpus) +uint64_t memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,7 +59,7 @@ uint64_t perf_test_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; @@ -72,12 +72,12 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) */ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); - start = align_down(perf_test_args.gpa, PG_SIZE_1G); - end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); + start = align_down(memstress_args.gpa, PG_SIZE_1G); + end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); nested_identity_map_1g(vmx, vm, start, end - start); } -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; @@ -85,12 +85,13 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc int vcpu_id; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vmx = vcpu_alloc_vmx(vm, &vmx_gva); if (vcpu_id == 0) { - perf_test_setup_ept(vmx, vm); + memstress_setup_ept(vmx, vm); vmx0 = vmx; } else { /* Share the same EPT table across all vCPUs. */ @@ -100,11 +101,11 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc } /* - * Override the vCPU to run perf_test_l1_guest_code() which will - * bounce it into L2 before calling perf_test_guest_code(). + * Override the vCPU to run memstress_l1_guest_code() which will + * bounce it into L2 before calling memstress_guest_code(). */ vcpu_regs_get(vcpus[vcpu_id], ®s); - regs.rip = (unsigned long) perf_test_l1_guest_code; + regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..c4d368d56cfe 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -131,23 +131,28 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, - int level) +static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, + uint64_t vaddr, int level) { - uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); + uint64_t pt_gpa = PTE_GET_PA(*parent_pte); + uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", + level + 1, vaddr); + return &page_table[index]; } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_pfn, + uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; @@ -197,21 +202,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, - vaddr, paddr, PG_LEVEL_512G, level); + pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); + pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); + pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); @@ -241,30 +245,25 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, - struct kvm_vcpu *vcpu, - uint64_t vaddr) +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - struct kvm_sregs sregs; - uint64_t rsvd_mask = 0; + if (*pte & PTE_LARGE_MASK) { + TEST_ASSERT(*level == PG_LEVEL_NONE || + *level == current_level, + "Unexpected hugepage at level %d\n", current_level); + *level = current_level; + } - /* Set the high bits in the reserved mask. */ - if (vm->pa_bits < 52) - rsvd_mask = GENMASK_ULL(51, vm->pa_bits); + return *level == current_level; +} - /* - * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries - * with 4-Level Paging and 5-Level Paging". - * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, - * the XD flag (bit 63) is reserved. - */ - vcpu_sregs_get(vcpu, &sregs); - if ((sregs.efer & EFER_NX) == 0) { - rsvd_mask |= PTE_NX_MASK; - } +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level) +{ + uint64_t *pml4e, *pdpe, *pde; + + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + "Invalid PG_LEVEL_* '%d'", *level); TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -279,54 +278,26 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid."); - index[0] = (vaddr >> 12) & 0x1ffu; - index[1] = (vaddr >> 21) & 0x1ffu; - index[2] = (vaddr >> 30) & 0x1ffu; - index[3] = (vaddr >> 39) & 0x1ffu; - - pml4e = addr_gpa2hva(vm, vm->pgd); - TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, - "Expected pml4e to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, - "Unexpected reserved bits set."); - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, - "Expected pdpe to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), - "Expected pdpe to map a pde not a 1-GByte page."); - TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); + pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); + if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) + return pml4e; - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, - "Expected pde to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), - "Expected pde to map a pte not a 2-MByte page."); - TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); + pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); + if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) + return pdpe; - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, - "Expected pte to be present for gva: 0x%08lx", vaddr); + pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); + if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) + return pde; - return &pte[index[0]]; + return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); } -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr) +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { - uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); + int level = PG_LEVEL_4K; - return *(uint64_t *)pte; -} - -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte) -{ - uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); - - *(uint64_t *)new_pte = pte; + return __vm_get_page_table_entry(vm, vaddr, &level); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) @@ -512,47 +483,23 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - index[0] = (gva >> 12) & 0x1ffu; - index[1] = (gva >> 21) & 0x1ffu; - index[2] = (gva >> 30) & 0x1ffu; - index[3] = (gva >> 39) & 0x1ffu; - - if (!vm->pgd_created) - goto unmapped_gva; - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - if (!(pde[index[1]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - if (!(pte[index[0]] & PTE_PRESENT_MASK)) - goto unmapped_gva; + int level = PG_LEVEL_NONE; + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); + TEST_ASSERT(*pte & PTE_PRESENT_MASK, + "Leaf PTE not PRESENT for gva: 0x%08lx", gva); -unmapped_gva: - TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(EXIT_FAILURE); + /* + * No need for a hugepage mask on the PTE, x86-64 requires the "unused" + * address bits to be zero. + */ + return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) { if (!vm->gdt) - vm->gdt = vm_vaddr_alloc_page(vm); + vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); dt->base = vm->gdt; dt->limit = getpagesize(); @@ -562,7 +509,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, int selector) { if (!vm->tss) - vm->tss = vm_vaddr_alloc_page(vm); + vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); memset(segp, 0, sizeof(*segp)); segp->base = vm->tss; @@ -605,38 +552,9 @@ static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } -void __vm_xsave_require_permission(int bit, const char *name) +void kvm_arch_vm_post_create(struct kvm_vm *vm) { - int kvm_fd; - u64 bitmask; - long rc; - struct kvm_device_attr attr = { - .group = 0, - .attr = KVM_X86_XCOMP_GUEST_SUPP, - .addr = (unsigned long) &bitmask - }; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); - - kvm_fd = open_kvm_dev_path_or_exit(); - rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); - close(kvm_fd); - - if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); - - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - - __TEST_REQUIRE(bitmask & (1ULL << bit), - "Required XSAVE feature '%s' not supported", name); - - TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); - - rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); - TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & (1ULL << bit), - "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", - bitmask); + vm_create_irqchip(vm); } struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, @@ -647,8 +565,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vm_vaddr_t stack_vaddr; struct kvm_vcpu *vcpu; - stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -683,25 +602,29 @@ void vcpu_arch_free(struct kvm_vcpu *vcpu) free(vcpu->cpuid); } +/* Do not use kvm_supported_cpuid directly except for validity checks. */ +static void *kvm_supported_cpuid; + const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { - static struct kvm_cpuid2 *cpuid; int kvm_fd; - if (cpuid) - return cpuid; + if (kvm_supported_cpuid) + return kvm_supported_cpuid; - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); kvm_fd = open_kvm_dev_path_or_exit(); - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, + (struct kvm_cpuid2 *)kvm_supported_cpuid); close(kvm_fd); - return cpuid; + return kvm_supported_cpuid; } -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature) +static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -714,12 +637,25 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, * order, but kvm_x86_cpu_feature matches that mess, so yay * pointer shenanigans! */ - if (entry->function == feature.function && - entry->index == feature.index) - return (&entry->eax)[feature.reg] & BIT(feature.bit); + if (entry->function == function && entry->index == index) + return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; } - return false; + return 0; +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + return __kvm_cpu_has(cpuid, feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) +{ + return __kvm_cpu_has(cpuid, property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); } uint64_t kvm_get_feature_msr(uint64_t msr_index) @@ -741,6 +677,41 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } +void __vm_xsave_require_permission(int bit, const char *name) +{ + int kvm_fd; + u64 bitmask; + long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + TEST_ASSERT(!kvm_supported_cpuid, + "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); + + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + + __TEST_REQUIRE(bitmask & (1ULL << bit), + "Required XSAVE feature '%s' not supported", name); + + TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & (1ULL << bit), + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) { TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); @@ -1059,34 +1030,15 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { - const struct kvm_cpuid_entry2 *entry; - bool pae; - - /* SDM 4.1.4 */ - if (kvm_get_cpuid_max_extended() < 0x80000008) { - pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); - *pa_bits = pae ? 36 : 32; + if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { + *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { - entry = kvm_get_supported_cpuid_entry(0x80000008); - *pa_bits = entry->eax & 0xff; - *va_bits = (entry->eax >> 8) & 0xff; + *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); } } -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -1116,6 +1068,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) regs->rip = regs->r11; regs->r9 = regs->vector; + regs->r10 = regs->error_code; return true; } @@ -1145,8 +1098,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) extern void *idt_handlers; int i; - vm->idt = vm_vaddr_alloc_page(vm); - vm->handlers = vm_vaddr_alloc_page(vm); + vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, @@ -1278,7 +1231,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint32_t eax, ebx, ecx, edx, max_ext_leaf; + uint8_t maxphyaddr; max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; @@ -1292,8 +1245,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; - cpuid(1, &eax, &ebx, &ecx, &edx); - if (x86_family(eax) < 0x17) + if (this_cpu_family() < 0x17) goto done; /* @@ -1301,17 +1253,14 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - max_ext_leaf = eax; - if (max_ext_leaf < 0x80000008) + if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) goto done; - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; - if (max_ext_leaf >= 0x8000001f) { - cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); - max_pfn >>= (ebx >> 6) & 0x3f; - } + maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; + + if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) + max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); ht_gfn = max_pfn - num_ht_pages; done: diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index e5f0f9e0d3ee..4d41dc63cc9e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,52 +8,25 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vcpu, ®s); - memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)regs.rdi; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d21049c38fc5..59d97531c9b1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -109,18 +109,6 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); - /* Setup of a region of guest memory for the VP Assist page. */ - vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm); - vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); - vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - vmx->enlightened_vmcs_hva = - addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); - vmx->enlightened_vmcs_gpa = - addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); - *p_vmx_gva = vmx_gva; return vmx; } @@ -171,26 +159,18 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { - if (!enable_evmcs) { - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; - - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = - vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; - } else { - if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, - vmx->enlightened_vmcs)) - return false; - current_evmcs->revision_id = EVMCS_VERSION; - } + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; return true; } @@ -544,26 +524,22 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } -bool kvm_vm_has_ept(struct kvm_vm *vm) +bool kvm_cpu_has_ept(void) { - struct kvm_vcpu *vcpu; uint64_t ctrl; - vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); - TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); - - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; return ctrl & SECONDARY_EXEC_ENABLE_EPT; } void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - TEST_REQUIRE(kvm_vm_has_ept(vm)); + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 9a6e4f3ad6b5..feaf2be20ff2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -11,6 +11,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/atomic.h> +#include <linux/sizes.h> #include "kvm_util.h" #include "test_util.h" @@ -162,8 +163,7 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t size_1gb = (1 << 30); - const uint64_t start_gpa = (4ull * size_1gb); + const uint64_t start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2; @@ -180,29 +180,26 @@ int main(int argc, char *argv[]) * are quite common for x86, requires changing only max_mem (KVM allows * 32k memslots, 32k * 2gb == ~64tb of guest memory). */ - slot_size = 2 * size_1gb; + slot_size = SZ_2G; max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_slots > first_slot, "KVM is broken"); /* All KVM MMUs should be able to survive a 128gb guest. */ - max_mem = 128 * size_1gb; + max_mem = 128ull * SZ_1G; calc_default_nr_vcpus(); while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { switch (opt) { case 'c': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); + nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'm': - max_mem = atoi(optarg) * size_1gb; - TEST_ASSERT(max_mem > 0, "memory size must be >0"); + max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; break; case 's': - slot_size = atoi(optarg) * size_1gb; - TEST_ASSERT(slot_size > 0, "slot size must be >0"); + slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; break; case 'H': hugepages = true; @@ -245,7 +242,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += size_1gb) + for (i = 0; i < slot_size; i += SZ_1G) __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else for (i = 0; i < slot_size; i += vm->page_size) @@ -260,7 +257,7 @@ int main(int argc, char *argv[]) vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", - (gpa - start_gpa) / size_1gb, nr_vcpus); + (gpa - start_gpa) / SZ_1G, nr_vcpus); rendezvous_with_vcpus(&time_start, "spawning"); rendezvous_with_vcpus(&time_run1, "run 1"); diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index bb1d17a1171b..9855c41ca811 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -21,7 +21,7 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "test_util.h" #include "guest_modes.h" @@ -34,9 +34,7 @@ static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; -static bool run_vcpus = true; - -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_run *run; @@ -45,7 +43,7 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) run = vcpu->run; /* Let the guest access its memory until a stop signal is received */ - while (READ_ONCE(run_vcpus)) { + while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); @@ -72,10 +70,10 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, int i; /* - * Add the dummy memslot just below the perf_test_util memslot, which is + * Add the dummy memslot just below the memstress memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm->page_size; + gpa = memstress_args.gpa - pages * vm->page_size; for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -87,8 +85,8 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, } struct test_params { - useconds_t memslot_modification_delay; - uint64_t nr_memslot_modifications; + useconds_t delay; + uint64_t nr_iterations; bool partition_vcpu_memory_access; }; @@ -97,25 +95,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - add_remove_memslot(vm, p->memslot_modification_delay, - p->nr_memslot_modifications); - - run_vcpus = false; + add_remove_memslot(vm, p->delay, p->nr_iterations); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) @@ -144,9 +139,8 @@ int main(int argc, char *argv[]) int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int opt; struct test_params p = { - .memslot_modification_delay = 0, - .nr_memslot_modifications = - DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, + .delay = 0, + .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, .partition_vcpu_memory_access = true }; @@ -158,16 +152,14 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.memslot_modification_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(p.memslot_modification_delay >= 0, - "A negative delay is not supported."); + p.delay = atoi_non_negative("Delay", optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; @@ -175,7 +167,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_memslot_modifications = atoi(optarg); + p.nr_iterations = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 44995446d942..e698306bf49d 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -20,20 +20,20 @@ #include <unistd.h> #include <linux/compiler.h> +#include <linux/sizes.h> #include <test_util.h> #include <kvm_util.h> #include <processor.h> -#define MEM_SIZE ((512U << 20) + 4096) -#define MEM_SIZE_PAGES (MEM_SIZE / 4096) -#define MEM_GPA 0x10000000UL +#define MEM_EXTRA_SIZE SZ_64K + +#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE) +#define MEM_GPA SZ_256M #define MEM_AUX_GPA MEM_GPA #define MEM_SYNC_GPA MEM_AUX_GPA -#define MEM_TEST_GPA (MEM_AUX_GPA + 4096) -#define MEM_TEST_SIZE (MEM_SIZE - 4096) -static_assert(MEM_SIZE % 4096 == 0, "invalid mem size"); -static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); +#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE) +#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE) /* * 32 MiB is max size that gets well over 100 iterations on 509 slots. @@ -41,44 +41,38 @@ static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); * 8194 slots in use can then be tested (although with slightly * limited resolution). */ -#define MEM_SIZE_MAP ((32U << 20) + 4096) -#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096) -#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) -#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096) -static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size"); +#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) +#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE) /* * 128 MiB is min size that fills 32k slots with at least one page in each * while at the same time gets 100+ iterations in such test + * + * 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_SIZE (128U << 20) -#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096) -/* 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12)) -static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE_PAGES % - (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0, - "invalid unmap test region size"); +#define MEM_TEST_UNMAP_SIZE SZ_128M +#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M /* * For the move active test the middle of the test area is placed on * a memslot boundary: half lies in the memslot being moved, half in * other memslot(s). * - * When running this test with 32k memslots (32764, really) each memslot - * contains 4 pages. - * The last one additionally contains the remaining 21 pages of memory, - * for the total size of 25 pages. - * Hence, the maximum size here is 50 pages. + * We have different number of memory slots, excluding the reserved + * memory slot 0, on various architectures and configurations. The + * memory size in this test is calculated by picking the maximal + * last memory slot's memory size, with alignment to the largest + * supported page size (64KB). In this way, the selected memory + * size for this test is compatible with test_memslot_move_prepare(). + * + * architecture slots memory-per-slot memory-on-last-slot + * -------------------------------------------------------------- + * x86-4KB 32763 16KB 160KB + * arm64-4KB 32766 16KB 112KB + * arm64-16KB 32766 16KB 112KB + * arm64-64KB 8192 64KB 128KB */ -#define MEM_TEST_MOVE_SIZE_PAGES (50) -#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096) +#define MEM_TEST_MOVE_SIZE (3 * SZ_64K) #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, "invalid move test region size"); @@ -100,6 +94,7 @@ struct vm_data { }; struct sync_area { + uint32_t guest_page_size; atomic_bool start_flag; atomic_bool exit_flag; atomic_bool sync_flag; @@ -192,14 +187,15 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) uint64_t gpage, pgoffs; uint32_t slot, slotoffs; void *base; + uint32_t guest_page_size = data->vm->page_size; TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); - TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, + TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, "Too high gpa to translate"); gpa -= MEM_GPA; - gpage = gpa / 4096; - pgoffs = gpa % 4096; + gpage = gpa / guest_page_size; + pgoffs = gpa % guest_page_size; slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); slotoffs = gpage - (slot * data->pages_per_slot); @@ -217,14 +213,16 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) } base = data->hva_slots[slot]; - return (uint8_t *)base + slotoffs * 4096 + pgoffs; + return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; } static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) { + uint32_t guest_page_size = data->vm->page_size; + TEST_ASSERT(slot < data->nslots, "Too high slot number"); - return MEM_GPA + slot * data->pages_per_slot * 4096; + return MEM_GPA + slot * data->pages_per_slot * guest_page_size; } static struct vm_data *alloc_vm(void) @@ -241,82 +239,111 @@ static struct vm_data *alloc_vm(void) return data; } +static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, + uint64_t pages_per_slot, uint64_t rempages) +{ + if (!pages_per_slot) + return false; + + if ((pages_per_slot * guest_page_size) % host_page_size) + return false; + + if ((rempages * guest_page_size) % host_page_size) + return false; + + return true; +} + + +static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) +{ + uint32_t guest_page_size = data->vm->page_size; + uint64_t mempages, pages_per_slot, rempages; + uint64_t slots; + + mempages = data->npages; + slots = data->nslots; + while (--slots > 1) { + pages_per_slot = mempages / slots; + rempages = mempages % pages_per_slot; + if (check_slot_pages(host_page_size, guest_page_size, + pages_per_slot, rempages)) + return slots + 1; /* slot 0 is reserved */ + } + + return 0; +} + static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, - void *guest_code, uint64_t mempages, + void *guest_code, uint64_t mem_size, struct timespec *slot_runtime) { - uint32_t max_mem_slots; - uint64_t rempages; + uint64_t mempages, rempages; uint64_t guest_addr; - uint32_t slot; + uint32_t slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; - max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); - TEST_ASSERT(max_mem_slots > 1, - "KVM_CAP_NR_MEMSLOTS should be greater than 1"); - TEST_ASSERT(nslots > 1 || nslots == -1, - "Slot count cap should be greater than 1"); - if (nslots != -1) - max_mem_slots = min(max_mem_slots, (uint32_t)nslots); - pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots); + host_page_size = getpagesize(); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + mempages = mem_size / guest_page_size; - TEST_ASSERT(mempages > 1, - "Can't test without any memory"); + data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); + TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size"); data->npages = mempages; - data->nslots = max_mem_slots - 1; - data->pages_per_slot = mempages / data->nslots; - if (!data->pages_per_slot) { - *maxslots = mempages + 1; + TEST_ASSERT(data->npages > 1, "Can't test without any memory"); + data->nslots = nslots; + data->pages_per_slot = data->npages / data->nslots; + rempages = data->npages % data->nslots; + if (!check_slot_pages(host_page_size, guest_page_size, + data->pages_per_slot, rempages)) { + *maxslots = get_max_slots(data, host_page_size); return false; } - rempages = mempages % data->nslots; data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); TEST_ASSERT(data->hva_slots, "malloc() fail"); data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); - ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", - max_mem_slots - 1, data->pages_per_slot, rempages); + data->nslots, data->pages_per_slot, rempages); clock_gettime(CLOCK_MONOTONIC, &tstart); - for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; npages = data->pages_per_slot; - if (slot == max_mem_slots - 1) + if (slot == data->nslots) npages += rempages; vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, guest_addr, slot, npages, 0); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } *slot_runtime = timespec_elapsed(tstart); - for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; uint64_t gpa; npages = data->pages_per_slot; - if (slot == max_mem_slots - 2) + if (slot == data->nslots) npages += rempages; - gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, - slot + 1); + gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, "vm_phy_pages_alloc() failed\n"); - data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); - memset(data->hva_slots[slot], 0, npages * 4096); + data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); + memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } - virt_map(data->vm, MEM_GPA, MEM_GPA, mempages); + virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); atomic_init(&sync->start_flag, false); @@ -415,6 +442,7 @@ static bool guest_perform_sync(void) static void guest_code_test_memslot_move(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); GUEST_SYNC(0); @@ -425,7 +453,7 @@ static void guest_code_test_memslot_move(void) uintptr_t ptr; for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; - ptr += 4096) + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; /* @@ -443,6 +471,7 @@ static void guest_code_test_memslot_move(void) static void guest_code_test_memslot_map(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -452,14 +481,16 @@ static void guest_code_test_memslot_map(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) @@ -506,6 +537,9 @@ static void guest_code_test_memslot_unmap(void) static void guest_code_test_memslot_rw(void) { + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + GUEST_SYNC(0); guest_spin_until_start(); @@ -514,14 +548,14 @@ static void guest_code_test_memslot_rw(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; - for (ptr = MEM_TEST_GPA + 4096 / 2; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) { + for (ptr = MEM_TEST_GPA + page_size / 2; + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); @@ -539,6 +573,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, uint64_t *maxslots, bool isactive) { + uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); @@ -547,7 +582,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint64_t lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); - if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) { + if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { *maxslots = 0; return false; } @@ -593,8 +628,9 @@ static void test_memslot_do_unmap(struct vm_data *data, uint64_t offsp, uint64_t count) { uint64_t gpa, ctr; + uint32_t guest_page_size = data->vm->page_size; - for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) { + for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { uint64_t npages; void *hva; int ret; @@ -602,12 +638,12 @@ static void test_memslot_do_unmap(struct vm_data *data, hva = vm_gpa2hva(data, gpa, &npages); TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); npages = min(npages, count - ctr); - ret = madvise(hva, npages * 4096, MADV_DONTNEED); + ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED); TEST_ASSERT(!ret, "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, hva, gpa); ctr += npages; - gpa += npages * 4096; + gpa += npages * guest_page_size; } TEST_ASSERT(ctr == count, "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); @@ -618,11 +654,12 @@ static void test_memslot_map_unmap_check(struct vm_data *data, { uint64_t gpa; uint64_t *val; + uint32_t guest_page_size = data->vm->page_size; if (!map_unmap_verify) return; - gpa = MEM_TEST_GPA + offsp * 4096; + gpa = MEM_TEST_GPA + offsp * guest_page_size; val = (typeof(val))vm_gpa2hva(data, gpa, NULL); TEST_ASSERT(*val == valexp, "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", @@ -632,12 +669,14 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; + /* * Unmap the second half of the test area while guest writes to (maps) * the first half. */ - test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2); /* * Wait for the guest to finish writing the first half of the test @@ -648,10 +687,8 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - test_memslot_map_unmap_check(data, - MEM_TEST_MAP_SIZE_PAGES / 2 - 1, - MEM_TEST_VAL_1); - test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1); + test_memslot_do_unmap(data, 0, guest_pages / 2); /* @@ -664,16 +701,16 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) * the test area. */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, - MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2); } static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, uint64_t chunk) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; uint64_t ctr; /* @@ -685,42 +722,49 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk) + for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); /* Likewise, but for the opposite host / guest areas */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2; - ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk) + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); } static void test_memslot_unmap_loop(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, 1); + uint32_t host_page_size = getpagesize(); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? + 1 : host_page_size / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { uint64_t gptr; + uint32_t guest_page_size = data->vm->page_size; - for (gptr = MEM_TEST_GPA + 4096 / 2; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) + for (gptr = MEM_TEST_GPA + guest_page_size / 2; + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; host_perform_sync(sync); for (gptr = MEM_TEST_GPA; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) { + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); uint64_t val = *vptr; @@ -749,7 +793,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, struct timespec *slot_runtime, struct timespec *guest_runtime) { - uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; + uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; struct vm_data *data; struct sync_area *sync; struct timespec tstart; @@ -764,6 +808,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); + sync->guest_page_size = data->vm->page_size; if (tdata->prepare && !tdata->prepare(data, sync, maxslots)) { ret = false; @@ -797,19 +842,19 @@ exit_free: static const struct test_data tests[] = { { .name = "map", - .mem_size = MEM_SIZE_MAP_PAGES, + .mem_size = MEM_SIZE_MAP, .guest_code = guest_code_test_memslot_map, .loop = test_memslot_map_loop, }, { .name = "unmap", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop, }, { .name = "unmap chunked", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop_chunked, }, @@ -867,9 +912,46 @@ static void help(char *name, struct test_args *targs) pr_info("%d: %s\n", ctr, tests[ctr].name); } +static bool check_memory_sizes(void) +{ + uint32_t host_page_size = getpagesize(); + uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + + if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { + pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", + host_page_size, guest_page_size); + return false; + } + + if (MEM_SIZE % guest_page_size || + MEM_TEST_SIZE % guest_page_size) { + pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n"); + return false; + } + + if (MEM_SIZE_MAP % guest_page_size || + MEM_TEST_MAP_SIZE % guest_page_size || + (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || + (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { + pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n"); + return false; + } + + if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE || + MEM_TEST_UNMAP_SIZE % guest_page_size || + (MEM_TEST_UNMAP_SIZE / guest_page_size) % + (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { + pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n"); + return false; + } + + return true; +} + static bool parse_args(int argc, char *argv[], struct test_args *targs) { + uint32_t max_mem_slots; int opt; while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { @@ -885,40 +967,28 @@ static bool parse_args(int argc, char *argv[], map_unmap_verify = true; break; case 's': - targs->nslots = atoi(optarg); - if (targs->nslots <= 0 && targs->nslots != -1) { - pr_info("Slot count cap has to be positive or -1 for no cap\n"); + targs->nslots = atoi_paranoid(optarg); + if (targs->nslots <= 1 && targs->nslots != -1) { + pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); return false; } break; case 'f': - targs->tfirst = atoi(optarg); - if (targs->tfirst < 0) { - pr_info("First test to run has to be non-negative\n"); - return false; - } + targs->tfirst = atoi_non_negative("First test", optarg); break; case 'e': - targs->tlast = atoi(optarg); - if (targs->tlast < 0 || targs->tlast >= NTESTS) { + targs->tlast = atoi_non_negative("Last test", optarg); + if (targs->tlast >= NTESTS) { pr_info("Last test to run has to be non-negative and less than %zu\n", NTESTS); return false; } break; case 'l': - targs->seconds = atoi(optarg); - if (targs->seconds < 0) { - pr_info("Test length in seconds has to be non-negative\n"); - return false; - } + targs->seconds = atoi_non_negative("Test length", optarg); break; case 'r': - targs->runs = atoi(optarg); - if (targs->runs <= 0) { - pr_info("Runs per test has to be positive\n"); - return false; - } + targs->runs = atoi_positive("Runs per test", optarg); break; } } @@ -933,6 +1003,21 @@ static bool parse_args(int argc, char *argv[], return false; } + max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + if (max_mem_slots <= 1) { + pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n"); + return false; + } + + /* Memory slot 0 is reserved */ + if (targs->nslots == -1) + targs->nslots = max_mem_slots - 1; + else + targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; + + pr_info_v("Allowed Number of memory slots: %"PRIu32"\n", + targs->nslots + 1); + return true; } @@ -1007,8 +1092,8 @@ int main(int argc, char *argv[]) struct test_result rbestslottime; int tctr; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); + if (!check_memory_sizes()) + return -1; if (!parse_args(argc, argv, &targs)) return -1; diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 6f88da7e60be..3045fdf9bdf5 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -205,9 +205,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); @@ -224,7 +221,6 @@ int main(int argc, char *argv[]) * CPU affinity. */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9113696d5178..3fd81e58f40c 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -760,8 +760,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 19486084eb30..e41e2cb8ffa9 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -296,8 +296,6 @@ int main(int argc, char *argv[]) bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); int idx; - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 3fdb6e2598eb..2ddde41c44ba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -231,9 +231,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 0d55f508d595..2ef1d1b72ce4 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -392,9 +392,6 @@ int main(int argc, char *argv[]) int i, loops; #endif - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - #ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because @@ -407,7 +404,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ if (argc > 1) - loops = atoi(argv[1]); + loops = atoi_positive("Number of iterations", argv[1]); else loops = 10; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index db8967f1a17b..c87f38712073 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -266,7 +266,6 @@ int main(int ac, char **av) gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); - ucall_init(vm, NULL); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 1c274933912b..7f5b330b6a1b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -121,7 +121,6 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); check_preconditions(vcpu); - ucall_init(vm, NULL); enter_guest(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index dadcbad10a1d..bd72c6eb3b67 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -39,11 +39,6 @@ #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) -#define TILE_CPUID 0x1d -#define XSTATE_CPUID 0xd -#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 -#define XSTATE_USER_STATE_SUBLEAVE 0x0 - #define XSAVE_HDR_OFFSET 512 struct xsave_data { @@ -129,71 +124,26 @@ static bool check_xsave_supports_xtile(void) return __xgetbv(0) & XFEATURE_MASK_XTILE; } -static bool enum_xtile_config(void) +static void check_xtile_info(void) { - u32 eax, ebx, ecx, edx; - - __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx || !ecx) - return false; - - xtile.max_names = ebx >> 16; - if (xtile.max_names < NUM_TILES) - return false; - - xtile.bytes_per_tile = eax >> 16; - if (xtile.bytes_per_tile < TILE_SIZE) - return false; - - xtile.bytes_per_row = ebx; - xtile.max_rows = ecx; - - return true; -} - -static bool enum_xsave_tile(void) -{ - u32 eax, ebx, ecx, edx; - - __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx) - return false; - - xtile.xsave_offset = ebx; - xtile.xsave_size = eax; - - return true; -} - -static bool check_xsave_size(void) -{ - u32 eax, ebx, ecx, edx; - bool valid = false; - - __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (ebx && ebx <= XSAVE_SIZE) - valid = true; - - return valid; -} - -static bool check_xtile_info(void) -{ - bool ret = false; - - if (!check_xsave_size()) - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); - if (!enum_xsave_tile()) - return ret; - - if (!enum_xtile_config()) - return ret; - - if (sizeof(struct tile_data) >= xtile.xsave_size) - ret = true; + xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); + GUEST_ASSERT(xtile.xsave_offset == 2816); + xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); + xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); + GUEST_ASSERT(xtile.max_names == 8); + xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); + GUEST_ASSERT(xtile.bytes_per_row == 64); + xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); + GUEST_ASSERT(xtile.max_rows == 16); } static void set_tilecfg(struct tile_config *cfg) @@ -238,16 +188,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, { init_regs(); check_cpuid_xsave(); - GUEST_ASSERT(check_xsave_supports_xtile()); - GUEST_ASSERT(check_xtile_info()); - - /* check xtile configs */ - GUEST_ASSERT(xtile.xsave_offset == 2816); - GUEST_ASSERT(xtile.xsave_size == 8192); - GUEST_ASSERT(xtile.max_names == 8); - GUEST_ASSERT(xtile.bytes_per_tile == 1024); - GUEST_ASSERT(xtile.bytes_per_row == 64); - GUEST_ASSERT(xtile.max_rows == 16); + check_xsave_supports_xtile(); + check_xtile_info(); GUEST_SYNC(1); /* xfd=0, enable amx */ @@ -307,18 +249,24 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + /* + * Note, all off-by-default features must be enabled before anything + * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has(). + */ vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT); - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - /* Get xsave/restore max size */ - xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx; + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), + "KVM should enumerate max XSAVE size when XSAVE is supported"); + xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); run = vcpu->run; vcpu_regs_get(vcpu, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index a6aeee2e62e4..2fc3ad9c887e 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -43,15 +43,6 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) } -static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0x40000000, &eax, &ebx, &ecx, &edx); - - GUEST_ASSERT(eax == 0x40000001); -} - static void guest_main(struct kvm_cpuid2 *guest_cpuid) { GUEST_SYNC(1); @@ -60,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - test_cpuid_40000000(guest_cpuid); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); GUEST_DONE(); } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 4208487652f8..1027a671c7d3 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -57,9 +57,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c deleted file mode 100644 index 236e11755ba6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ /dev/null @@ -1,193 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MAXPHYADDR 36 - -#define MEM_REGION_GVA 0x0000123456789000 -#define MEM_REGION_GPA 0x0000000700000000 -#define MEM_REGION_SLOT 10 -#define MEM_REGION_SIZE PAGE_SIZE - -static void guest_code(void) -{ - __asm__ __volatile__("flds (%[addr])" - :: [addr]"r"(MEM_REGION_GVA)); - - GUEST_DONE(); -} - -/* - * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, - * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". - */ -#define GET_RM(insn_byte) (insn_byte & 0x7) -#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) -#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) - -/* Ensure we are dealing with a simple 2-byte flds instruction. */ -static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) -{ - return insn_size >= 2 && - insn_bytes[0] == 0xd9 && - GET_REG(insn_bytes[1]) == 0x0 && - GET_MOD(insn_bytes[1]) == 0x0 && - /* Ensure there is no SIB byte. */ - GET_RM(insn_bytes[1]) != 0x4 && - /* Ensure there is no displacement byte. */ - GET_RM(insn_bytes[1]) != 0x5; -} - -static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - uint8_t *insn_bytes; - uint8_t insn_size; - uint64_t flags; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Unexpected suberror: %u", - run->emulation_failure.suberror); - - if (run->emulation_failure.ndata >= 1) { - flags = run->emulation_failure.flags; - if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && - run->emulation_failure.ndata >= 3) { - insn_size = run->emulation_failure.insn_size; - insn_bytes = run->emulation_failure.insn_bytes; - - TEST_ASSERT(insn_size <= 15 && insn_size > 0, - "Unexpected instruction size: %u", - insn_size); - - TEST_ASSERT(is_flds(insn_bytes, insn_size), - "Unexpected instruction. Expected 'flds' (0xd9 /0)"); - - /* - * If is_flds() succeeded then the instruction bytes - * contained an flds instruction that is 2-bytes in - * length (ie: no prefix, no SIB, no displacement). - */ - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); - } - } -} - -static void do_guest_assert(struct ucall *uc) -{ - REPORT_GUEST_ASSERT(*uc); -} - -static void check_for_guest_assert(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (vcpu->run->exit_reason == KVM_EXIT_IO && - get_ucall(vcpu, &uc) == UCALL_ABORT) { - do_guest_assert(&uc); - } -} - -static void process_ucall_done(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - check_for_guest_assert(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, - "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", - uc.cmd, UCALL_DONE); -} - -static uint64_t process_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - do_guest_assert(&uc); - break; - case UCALL_DONE: - process_ucall_done(vcpu); - break; - default: - TEST_ASSERT(false, "Unexpected ucall"); - } - - return uc.cmd; -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t gpa, pte; - uint64_t *hva; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); - - rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); - TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); - vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - MEM_REGION_GPA, MEM_REGION_SLOT, - MEM_REGION_SIZE / PAGE_SIZE, 0); - gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, - MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); - virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); - hva = addr_gpa2hva(vm, MEM_REGION_GPA); - memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); - vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36)); - - vcpu_run(vcpu); - process_exit_on_emulation_error(vcpu); - vcpu_run(vcpu); - - TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c new file mode 100644 index 000000000000..37c61f712fd5 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + * + * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" + +#define MMIO_GPA 0x700000000 +#define MMIO_GVA MMIO_GPA + +static void guest_code(void) +{ + /* Execute flds with an MMIO address to force KVM to emulate it. */ + flds(MMIO_GVA); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + virt_map(vm, MMIO_GVA, MMIO_GPA, 1); + + vcpu_run(vcpu); + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h new file mode 100644 index 000000000000..e43a7df25f2c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_FLDS_EMULATION_H +#define SELFTEST_KVM_FLDS_EMULATION_H + +#include "kvm_util.h" + +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + +/* + * flds is an instruction that the KVM instruction emulator is known not to + * support. This can be used in guest code along with a mechanism to force + * KVM to emulate the instruction (e.g. by providing an MMIO address) to + * exercise emulation failures. + */ +static inline void flds(uint64_t address) +{ + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); +} + +static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + uint8_t *insn_bytes; + uint64_t flags; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); + + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); + + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + insn_bytes[0], insn_bytes[1]); + + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); +} + +#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index e804eb08dff9..5c27efbf405e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -134,9 +134,6 @@ int main(int argc, char *argv[]) const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 99bc202243d2..af29e5776d40 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -16,6 +16,7 @@ #include "kvm_util.h" +#include "hyperv.h" #include "vmx.h" static int ud_count; @@ -30,24 +31,19 @@ static void guest_nmi_handler(struct ex_regs *regs) { } -/* Exits to L1 destroy GRPs! */ -static inline void rdmsr_fs_base(void) +static inline void rdmsr_from_l2(uint32_t msr) { - __asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); -} -static inline void rdmsr_gs_base(void) -{ - __asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } +/* Exit to L1 from L2 with RDMSR instruction */ void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(7); GUEST_SYNC(8); @@ -58,42 +54,58 @@ void l2_guest_code(void) vmcall(); /* MSR-Bitmap tests */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_gs_base(); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmcall(); - rdmsr_gs_base(); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ + + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, + &unused); /* Done, exit to L1 and never come back. */ vmcall(); } -void guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, + vm_vaddr_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); + x2apic_enable(); GUEST_SYNC(1); GUEST_SYNC(2); - enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); - GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_SYNC(3); - GUEST_ASSERT(load_vmcs(vmx_pages)); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(load_evmcs(hv_pages)); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(5); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); current_evmcs->revision_id = -1u; GUEST_ASSERT(vmlaunch()); current_evmcs->revision_id = EVMCS_VERSION; @@ -102,8 +114,18 @@ void guest_code(struct vmx_pages *vmx_pages) vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | PIN_BASED_NMI_EXITING); + /* L2 TLB flush setup */ + current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; + current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + current_evmcs->hv_vm_id = 1; + current_evmcs->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); + GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); /* * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is @@ -120,7 +142,7 @@ void guest_code(struct vmx_pages *vmx_pages) /* Intercept RDMSR 0xc0000100 */ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); - set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ @@ -132,7 +154,7 @@ void guest_code(struct vmx_pages *vmx_pages) current_evmcs->guest_rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; GUEST_ASSERT(!vmresume()); @@ -146,12 +168,24 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); + GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ - evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); + evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(ud_count == 1); GUEST_DONE(); @@ -198,7 +232,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -212,11 +247,16 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + vcpu_set_hv_cpuid(vcpu); vcpu_enable_evmcs(vcpu); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 05b32e550a80..3163c3e8db0a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,25 +13,6 @@ #include "processor.h" #include "hyperv.h" -#define LINUX_OS_ID ((u64)0x8100 << 48) - -static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, uint64_t *hv_status) -{ - uint8_t vector; - - /* Note both the hypercall and the "asm safe" clobber r9-r11. */ - asm volatile("mov %[output_address], %%r8\n\t" - KVM_ASM_SAFE("vmcall") - : "=a" (*hv_status), - "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector) - : [output_address] "r"(output_address), - "a" (-EFAULT) - : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); - return vector; -} - struct msr_data { uint32_t idx; bool available; @@ -71,7 +52,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) GUEST_ASSERT(hcall->control); - wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { @@ -81,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) input = output = 0; } - vector = hypercall(hcall->control, input, output, &res); + vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); } else { @@ -169,7 +150,7 @@ static void guest_test_msrs_access(void) */ msr->idx = HV_X64_MSR_GUEST_OS_ID; msr->write = 1; - msr->write_val = LINUX_OS_ID; + msr->write_val = HYPERV_LINUX_OS_ID; msr->available = 1; break; case 3: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c new file mode 100644 index 000000000000..8b791eac7d5a --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <pthread.h> +#include <inttypes.h> + +#include "kvm_util.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define RECEIVER_VCPU_ID_1 2 +#define RECEIVER_VCPU_ID_2 65 + +#define IPI_VECTOR 0xfe + +static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[2]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + +static inline void hv_init(vm_vaddr_t pgs_gpa) +{ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); +} + +static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + u32 vcpu_id; + + x2apic_enable(); + hv_init(pgs_gpa); + + vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + /* Signal sender vCPU we're ready */ + ipis_rcvd[vcpu_id] = (u64)-1; + + for (;;) + asm volatile("sti; hlt; cli"); +} + +static void guest_ipi_handler(struct ex_regs *regs) +{ + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + ipis_rcvd[vcpu_id]++; + wrmsr(HV_X64_MSR_EOI, 1); +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 100000000; i++) + asm volatile("nop"); +} + +static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; + struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; + int stage = 1, ipis_expected[2] = {0}; + + hv_init(pgs_gpa); + GUEST_SYNC(stage++); + + /* Wait for receiver vCPUs to come up */ + while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2]) + nop_loop(); + ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0; + + /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + ipi->vector = IPI_VECTOR; + ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 0; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* + * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. + * Nothing to write anything to XMM regs. + */ + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, HV_GENERIC_SET_ALL); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + int old, r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + + TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id); + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + vm_vaddr_t hcall_page; + pthread_t threads[2]; + int stage = 1, r; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + vm_init_descriptor_tables(vm); + + vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); + vcpu_init_descriptor_tables(vcpu[1]); + vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); + vcpu_init_descriptor_tables(vcpu[2]); + vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_hv_cpuid(vcpu[0]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", r); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", errno); + + while (true) { + vcpu_run(vcpu[0]); + + exit_reason = vcpu[0]->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return r; +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index a380ad7bb9b3..68a7d354ea07 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -23,59 +23,78 @@ #define L2_GUEST_STACK_SIZE 256 -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31) +/* Exit to L1 from L2 with RDMSR instruction */ +static inline void rdmsr_from_l2(uint32_t msr) +{ + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(3); /* Exit to L1 */ vmmcall(); /* MSR-Bitmap tests */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_GS_BASE); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmmcall(); - rdmsr(MSR_GS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ GUEST_SYNC(5); + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS, &unused); + /* Done, exit to L1 and never come back. */ vmmcall(); } -static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) +static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, + struct hyperv_test_pages *hv_pages, + vm_vaddr_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; GUEST_SYNC(1); - wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); GUEST_ASSERT(svm->vmcb_gpa); /* Prepare for L2 execution. */ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + /* L2 TLB flush setup */ + hve->partition_assist_page = hv_pages->partition_assist_gpa; + hve->hv_enlightenments_control.nested_flush_hypercall = 1; + hve->hv_vm_id = 1; + hve->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_SYNC(2); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); @@ -84,7 +103,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) /* Intercept RDMSR 0xc0000100 */ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; - set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ @@ -96,20 +115,34 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) vmcb->save.rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ - vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); vmcb->save.rip += 3; /* vmcall */ /* Now tell KVM we've changed MSR-Bitmap */ - vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ + + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL); + GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH); + run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); GUEST_SYNC(6); @@ -119,8 +152,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; - + vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; @@ -134,7 +167,13 @@ int main(int argc, char *argv[]) vcpu_set_hv_cpuid(vcpu); run = vcpu->run; vcpu_alloc_svm(vm, &nested_gva); - vcpu_args_set(vcpu, 1, nested_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + + vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); for (stage = 1;; stage++) { vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c new file mode 100644 index 000000000000..68f97ff720a7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <asm/barrier.h> +#include <pthread.h> +#include <inttypes.h> + +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define WORKER_VCPU_ID_1 2 +#define WORKER_VCPU_ID_2 65 + +#define NTRY 100 +#define NTEST_PAGES 2 + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +} __packed; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +} __packed; + +/* + * Pass the following info to 'workers' and 'sender' + * - Hypercall page's GVA + * - Hypercall page's GPA + * - Test pages GVA + * - GVAs of the test pages' PTEs + */ +struct test_data { + vm_vaddr_t hcall_gva; + vm_paddr_t hcall_gpa; + vm_vaddr_t test_pages; + vm_vaddr_t test_pages_pte[NTEST_PAGES]; +}; + +/* 'Worker' vCPU code checking the contents of the test page */ +static void worker_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES; + u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64)); + u64 expected, val; + + x2apic_enable(); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + + for (;;) { + cpu_relax(); + + expected = READ_ONCE(*this_cpu); + + /* + * Make sure the value in the test page is read after reading + * the expectation for the first time. Pairs with wmb() in + * prepare_to_test(). + */ + rmb(); + + val = READ_ONCE(*(u64 *)data->test_pages); + + /* + * Make sure the value in the test page is read after before + * reading the expectation for the second time. Pairs with wmb() + * post_test(). + */ + rmb(); + + /* + * '0' indicates the sender is between iterations, wait until + * the sender is ready for this vCPU to start checking again. + */ + if (!expected) + continue; + + /* + * Re-read the per-vCPU byte to ensure the sender didn't move + * onto a new iteration. + */ + if (expected != READ_ONCE(*this_cpu)) + continue; + + GUEST_ASSERT(val == expected); + } +} + +/* + * Write per-CPU info indicating what each 'worker' CPU is supposed to see in + * test page. '0' means don't check. + */ +static void set_expected_val(void *addr, u64 val, int vcpu_id) +{ + void *exp_page = addr + PAGE_SIZE * NTEST_PAGES; + + *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val; +} + +/* + * Update PTEs swapping two test pages. + * TODO: use swap()/xchg() when these are provided. + */ +static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +{ + uint64_t tmp = *(uint64_t *)pte_gva1; + + *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; + *(uint64_t *)pte_gva2 = tmp; +} + +/* + * TODO: replace the silly NOP loop with a proper udelay() implementation. + */ +static inline void do_delay(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +/* + * Prepare to test: 'disable' workers by setting the expectation to '0', + * clear hypercall input page and then swap two test pages. + */ +static inline void prepare_to_test(struct test_data *data) +{ + /* Clear hypercall input page */ + memset((void *)data->hcall_gva, 0, PAGE_SIZE); + + /* 'Disable' workers */ + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2); + + /* Make sure workers are 'disabled' before we swap PTEs. */ + wmb(); + + /* Make sure workers have enough time to notice */ + do_delay(); + + /* Swap test page mappings */ + swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]); +} + +/* + * Finalize the test: check hypercall resule set the expected val for + * 'worker' CPUs and give them some time to test. + */ +static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) +{ + /* Make sure we change the expectation after swapping PTEs */ + wmb(); + + /* Set the expectation for workers, '0' means don't test */ + set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2); + + /* Make sure workers have enough time to test */ + do_delay(); +} + +#define TESTVAL1 0x0101010101010101 +#define TESTVAL2 0x0202020202020202 + +/* Main vCPU doing the test */ +static void sender_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; + vm_paddr_t hcall_gpa = data->hcall_gpa; + int i, stage = 1; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa); + + /* "Slow" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->processor_mask = 0; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + /* "Fast" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : + TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT, + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + struct ucall uc; + int old; + int r; + unsigned int exit_reason; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + exit_reason = vcpu->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", + vcpu->id, exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + pthread_t threads[2]; + vm_vaddr_t test_data_page, gva; + vm_paddr_t gpa; + uint64_t *pte; + struct test_data *data; + struct ucall uc; + int stage = 1, r, i; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Test data page */ + test_data_page = vm_vaddr_alloc_page(vm); + data = (struct test_data *)addr_gva2hva(vm, test_data_page); + + /* Hypercall input/output */ + data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); + memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); + + /* + * Test pages: the first one is filled with '0x01's, the second with '0x02's + * and the test will swap their mappings. The third page keeps the indication + * about the current state of mappings. + */ + data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + for (i = 0; i < NTEST_PAGES; i++) + memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), + (u8)(i + 1), PAGE_SIZE); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2); + + /* + * Get PTE pointers for test pages and map them inside the guest. + * Use separate page for each PTE for simplicity. + */ + gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + for (i = 0; i < NTEST_PAGES; i++) { + pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + gpa = addr_hva2gpa(vm, pte); + __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); + } + + /* + * Sender vCPU which performs the test: swaps test pages, sets expectation + * for 'workers' and issues TLB flush hypercalls. + */ + vcpu_args_set(vcpu[0], 1, test_data_page); + vcpu_set_hv_cpuid(vcpu[0]); + + /* Create worker vCPUs which check the contents of the test pages */ + vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code); + vcpu_args_set(vcpu[1], 1, test_data_page); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code); + vcpu_args_set(vcpu[2], 1, test_data_page); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create() failed"); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create() failed"); + + while (true) { + vcpu_run(vcpu[0]); + exit_reason = vcpu[0]->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 59ffe7fd354f..ea0978f22db8 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,10 +241,10 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi(optarg); + reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); break; case 't': - token = atoi(optarg); + token = atoi_paranoid(optarg); break; case 'r': reboot_permissions = true; @@ -257,7 +257,6 @@ int main(int argc, char **argv) } TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); - TEST_REQUIRE(reclaim_period_ms > 0); __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 76417c7d687b..310a104d94f0 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,9 +72,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t msr_platform_info; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index ea4e259a1e2e..2de98fce7edd 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -21,29 +21,6 @@ #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_ebx { - struct { - unsigned int no_unhalted_core_cycles:1; - unsigned int no_instructions_retired:1; - unsigned int no_unhalted_reference_cycles:1; - unsigned int no_llc_reference:1; - unsigned int no_llc_misses:1; - unsigned int no_branch_instruction_retired:1; - unsigned int no_branch_misses_retired:1; - } split; - unsigned int full; -}; - /* End of stuff taken from perf_event.h. */ /* Oddly, this isn't in perf_event.h. */ @@ -380,46 +357,31 @@ static void test_pmu_config_disable(void (*guest_code)(void)) } /* - * Check for a non-zero PMU version, at least one general-purpose - * counter per logical processor, an EBX bit vector of length greater - * than 5, and EBX[5] clear. - */ -static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry) -{ - union cpuid10_eax eax = { .full = entry->eax }; - union cpuid10_ebx ebx = { .full = entry->ebx }; - - return eax.split.version_id && eax.split.num_counters > 0 && - eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && - !ebx.split.no_branch_instruction_retired; -} - -/* - * Note that CPUID leaf 0xa is Intel-specific. This leaf should be - * clear on AMD hardware. + * On Intel, check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, and support for counting the number of branch + * instructions retired. */ static bool use_intel_pmu(void) { - const struct kvm_cpuid_entry2 *entry; - - entry = kvm_get_supported_cpuid_entry(0xa); - return is_intel_cpu() && check_intel_pmu_leaf(entry); + return is_intel_cpu() && + kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && + kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t eax) +static bool is_zen1(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; + return family == 0x17 && model <= 0x0f; } -static bool is_zen2(uint32_t eax) +static bool is_zen2(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && - x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; + return family == 0x17 && model >= 0x30 && model <= 0x3f; } -static bool is_zen3(uint32_t eax) +static bool is_zen3(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; + return family == 0x19 && model <= 0x0f; } /* @@ -432,13 +394,13 @@ static bool is_zen3(uint32_t eax) */ static bool use_amd_pmu(void) { - const struct kvm_cpuid_entry2 *entry; + uint32_t family = kvm_cpu_family(); + uint32_t model = kvm_cpu_model(); - entry = kvm_get_supported_cpuid_entry(1); return is_amd_cpu() && - (is_zen1(entry->eax) || - is_zen2(entry->eax) || - is_zen3(entry->eax)); + (is_zen1(family, model) || + is_zen2(family, model) || + is_zen3(family, model)); } int main(int argc, char *argv[]) @@ -447,9 +409,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 2bb08bf2125d..a284fcef6ed7 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -82,9 +82,6 @@ int main(int argc, char *argv[]) uint64_t cr4; int rc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - /* * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and * use it to verify all supported CR4 bits can be set prior to defining diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c new file mode 100644 index 000000000000..06edf00a97d6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Test that KVM emulates instructions in response to EPT violations when + * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MAXPHYADDR 36 + +#define MEM_REGION_GVA 0x0000123456789000 +#define MEM_REGION_GPA 0x0000000700000000 +#define MEM_REGION_SLOT 10 +#define MEM_REGION_SIZE PAGE_SIZE + +static void guest_code(bool tdp_enabled) +{ + uint64_t error_code; + uint64_t vector; + + vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); + + /* + * When TDP is enabled, flds will trigger an emulation failure, exit to + * userspace, and then the selftest host "VMM" skips the instruction. + * + * When TDP is disabled, no instruction emulation is required so flds + * should generate #PF(RSVD). + */ + if (tdp_enabled) { + GUEST_ASSERT(!vector); + } else { + GUEST_ASSERT_EQ(vector, PF_VECTOR); + GUEST_ASSERT(error_code & PFERR_RSVD_MASK); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + uint64_t *pte; + uint64_t *hva; + uint64_t gpa; + int rc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); + + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / PAGE_SIZE, 0); + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, + MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, PAGE_SIZE); + + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); + *pte |= BIT_ULL(MAXPHYADDR); + + vcpu_run(vcpu); + + /* + * When TDP is enabled, KVM must emulate in response the guest physical + * address that is illegal from the guest's perspective, but is legal + * from hardware's perspeective. This should result in an emulation + * failure exit to userspace since KVM doesn't support emulating flds. + */ + if (kvm_is_tdp_enabled()) { + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 1f136a81858e..cb38a478e1f6 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; int stage, stage_reported; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..e73fcdef47bb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index e637d7736012..e497ace629c1 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -194,9 +194,6 @@ done: int main(int argc, char *argv[]) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 9b6db0b0b13e..d2f9b5bdfab2 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -90,9 +90,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu_events events; int rv, cap; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 70b44f0b52fe..ead5d878a71c 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -3,6 +3,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -20,10 +21,11 @@ static void l2_guest_code(void) : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); } -void l1_guest_code(struct vmx_pages *vmx) -{ #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); @@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vcpu_events events; - vm_vaddr_t vmx_pages_gva; struct ucall uc; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); run = vcpu->run; - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -78,13 +109,21 @@ int main(void) "No triple fault pending"); vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } + if (has_svm) { + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 7316521428f8..91076c9787b4 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -56,9 +56,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index a4f06370a245..25fa55344a10 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -733,16 +733,98 @@ static void test_msr_permission_bitmap(void) kvm_vm_free(vm); } -int main(int argc, char *argv[]) +#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ +({ \ + int r = __vm_ioctl(vm, cmd, arg); \ + \ + if (flag & valid_mask) \ + TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ + else \ + TEST_ASSERT(r == -1 && errno == EINVAL, \ + "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ + #cmd, flag, r, errno, strerror(errno)); \ +}) + +static void run_user_space_msr_flag_test(struct kvm_vm *vm) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); + struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; + int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + + for (i = 0; i < nflags; i++) { + cap.args[0] = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, + BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); + } +} + +static void run_msr_filter_flag_test(struct kvm_vm *vm) +{ + u64 deny_bits = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = 0, + .bitmap = (uint8_t *)&deny_bits, + }, + }, + }; + int nflags; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + nflags = sizeof(filter.flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); + } + filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.ranges[0].flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); + } +} + +/* Test that attempts to write to the unused bits in a flag fails. */ +static void test_user_exit_msr_flags(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ + run_user_space_msr_flag_test(vm); + + /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ + run_msr_filter_flag_test(vm); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ test_msr_filter_allow(); test_msr_filter_deny(); test_msr_permission_bitmap(); + test_user_exit_msr_flags(); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 2d8c23d639f7..f0456fb031b1 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) bool done = false; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c index 322d561b4260..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c @@ -67,6 +67,52 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); } +static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, + uint64_t msr_bit, + struct kvm_x86_cpu_feature feature) +{ + uint64_t val; + + vcpu_clear_cpuid_feature(vcpu, feature); + + val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val); + + if (!kvm_cpu_has(feature)) + return; + + vcpu_set_cpuid_feature(vcpu, feature); +} + +static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) +{ + uint64_t supported_bits = FEAT_CTL_LOCKED | + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | + FEAT_CTL_SGX_LC_ENABLED | + FEAT_CTL_SGX_ENABLED | + FEAT_CTL_LMCE_ENABLED; + int bit, r; + + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE); + + for_each_clear_bit(bit, &supported_bits, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit)); + TEST_ASSERT(r == 0, + "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit); + } +} + int main(void) { struct kvm_vcpu *vcpu; @@ -79,6 +125,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, NULL); vmx_save_restore_msrs_test(vcpu); + ia32_feature_control_msr_test(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 069589c52f41..c280ba1e6572 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -20,16 +20,6 @@ #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - union perf_capabilities { struct { u64 lbr_format:6; @@ -53,11 +43,9 @@ static void guest_code(void) int main(int argc, char *argv[]) { - const struct kvm_cpuid_entry2 *entry_a_0; struct kvm_vm *vm; struct kvm_vcpu *vcpu; int ret; - union cpuid10_eax eax; union perf_capabilities host_cap; uint64_t val; @@ -69,11 +57,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); - TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa); - entry_a_0 = kvm_get_supported_cpuid_entry(0xa); - - eax.full = entry_a_0->eax; - __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU"); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); /* testcase 1, set capabilities when we have PDCM bit */ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 6f7a5ef66718..d7d37dae3eeb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -114,7 +114,9 @@ static void test_icr(struct xapic_vcpu *x) * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; - for (i = vcpu->id + 1; i < 0xff; i++) { + for (i = 0; i < 0xff; i++) { + if (i == vcpu->id) + continue; for (j = 0; j < 8; j++) __test_icr(x, i << (32 + 24) | icr | (j << 8)); } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 2a5727188c8d..721f6a693799 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -26,17 +26,17 @@ #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 -#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) +#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) #define DUMMY_REGION_SLOT 11 #define SHINFO_ADDR (SHINFO_REGION_GPA) -#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) -#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) +#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) #define SHINFO_VADDR (SHINFO_REGION_GVA) -#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) #define EVTCHN_VECTOR 0x10 @@ -88,14 +88,20 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); struct vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[4]; + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; /* Extra field for overrun check */ }; +struct compat_vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; +} __attribute__((__packed__));; + struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ }; struct vcpu_info { @@ -440,6 +446,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); @@ -449,8 +456,8 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); - virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); + SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); @@ -475,6 +482,19 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + if (do_runstate_flag) { + struct kvm_xen_hvm_attr ruf = { + .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, + .u.runstate_update_flag = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); + + ruf.u.runstate_update_flag = 0; + vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); + TEST_ASSERT(ruf.u.runstate_update_flag == 1, + "Failed to read back RUNSTATE_UPDATE_FLAG attr"); + } + struct kvm_xen_hvm_attr ha = { .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, @@ -999,22 +1019,91 @@ int main(int argc, char *argv[]) runstate_names[i], rs->time[i]); } } - TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); - TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, - "State entry time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, - "Running time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, - "Runnable time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, - "Blocked time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, - "Offline time mismatch"); - - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + + /* + * Exercise runstate info at all points across the page boundary, in + * 32-bit and 64-bit mode. In particular, test the case where it is + * configured in 32-bit mode and then switched to 64-bit mode while + * active, which takes it onto the second page. + */ + unsigned long runstate_addr; + struct compat_vcpu_runstate_info *crs; + for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; + runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { + + rs = addr_gpa2hva(vm, runstate_addr); + crs = (void *)rs; + + memset(rs, 0xa5, sizeof(*rs)); + + /* Set to compatibility mode */ + lm.u.long_mode = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + /* Set runstate to new address (kernel will write it) */ + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = runstate_addr, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); + + if (verbose) + printf("Compatibility runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + TEST_ASSERT(crs->state_entry_time == crs->time[0] + + crs->time[1] + crs->time[2] + crs->time[3], + "runstate times don't add up"); + + + /* Now switch to 64-bit mode */ + lm.u.long_mode = 1; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + memset(rs, 0xa5, sizeof(*rs)); + + /* Don't change the address, just trigger a write */ + struct kvm_xen_vcpu_attr adj = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, + .u.runstate.state = (uint64_t)-1 + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); + + if (verbose) + printf("64-bit runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } } + kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index da9290817866..792c3f0a59b4 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -75,7 +75,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(2, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(3, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, @@ -263,23 +263,6 @@ TEST(ruleset_fd_transfer) .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, }; int ruleset_fd_tx, dir_fd; - union { - /* Aligned ancillary data buffer. */ - char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))]; - struct cmsghdr _align; - } cmsg_tx = {}; - char data_tx = '.'; - struct iovec io = { - .iov_base = &data_tx, - .iov_len = sizeof(data_tx), - }; - struct msghdr msg = { - .msg_iov = &io, - .msg_iovlen = 1, - .msg_control = &cmsg_tx.buf, - .msg_controllen = sizeof(cmsg_tx.buf), - }; - struct cmsghdr *cmsg; int socket_fds[2]; pid_t child; int status; @@ -298,33 +281,20 @@ TEST(ruleset_fd_transfer) &path_beneath_attr, 0)); ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); - cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_NE(NULL, cmsg); - cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx)); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); - /* Sends the ruleset FD over a socketpair and then close it. */ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); - ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); + ASSERT_EQ(0, send_fd(socket_fds[0], ruleset_fd_tx)); ASSERT_EQ(0, close(socket_fds[0])); ASSERT_EQ(0, close(ruleset_fd_tx)); child = fork(); ASSERT_LE(0, child); if (child == 0) { - int ruleset_fd_rx; + const int ruleset_fd_rx = recv_fd(socket_fds[1]); - *(char *)msg.msg_iov->iov_base = '\0'; - ASSERT_EQ(sizeof(data_tx), - recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); - ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); + ASSERT_LE(0, ruleset_fd_rx); ASSERT_EQ(0, close(socket_fds[1])); - cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx))); - memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx)); /* Enforces the received ruleset on the child. */ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 7ba18eb23783..d7987ae8d7fc 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -10,6 +10,7 @@ #include <errno.h> #include <linux/landlock.h> #include <sys/capability.h> +#include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> @@ -17,6 +18,10 @@ #include "../kselftest_harness.h" +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + /* * TEST_F_FORK() is useful when a test drop privileges but the corresponding * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory @@ -140,14 +145,12 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) } /* We cannot put such helpers in a library because of kselftest_harness.h . */ -__attribute__((__unused__)) static void -disable_caps(struct __test_metadata *const _metadata) +static void __maybe_unused disable_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, false); } -__attribute__((__unused__)) static void -drop_caps(struct __test_metadata *const _metadata) +static void __maybe_unused drop_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, true); } @@ -176,14 +179,80 @@ static void _effective_cap(struct __test_metadata *const _metadata, } } -__attribute__((__unused__)) static void -set_cap(struct __test_metadata *const _metadata, const cap_value_t caps) +static void __maybe_unused set_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_SET); } -__attribute__((__unused__)) static void -clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps) +static void __maybe_unused clear_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_CLEAR); } + +/* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */ +static int __maybe_unused recv_fd(int usock) +{ + int fd_rx; + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(fd_rx))]; + struct cmsghdr _align; + } cmsg_rx = {}; + char data = '\0'; + struct iovec io = { + .iov_base = &data, + .iov_len = sizeof(data), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_rx.buf, + .msg_controllen = sizeof(cmsg_rx.buf), + }; + struct cmsghdr *cmsg; + int res; + + res = recvmsg(usock, &msg, MSG_CMSG_CLOEXEC); + if (res < 0) + return -errno; + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg->cmsg_len != CMSG_LEN(sizeof(fd_rx))) + return -EIO; + + memcpy(&fd_rx, CMSG_DATA(cmsg), sizeof(fd_rx)); + return fd_rx; +} + +/* Sends an FD on a UNIX socket. Returns 0 on success or -errno. */ +static int __maybe_unused send_fd(int usock, int fd_tx) +{ + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(fd_tx))]; + struct cmsghdr _align; + } cmsg_tx = {}; + char data_tx = '.'; + struct iovec io = { + .iov_base = &data_tx, + .iov_len = sizeof(data_tx), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_tx.buf, + .msg_controllen = sizeof(cmsg_tx.buf), + }; + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + + cmsg->cmsg_len = CMSG_LEN(sizeof(fd_tx)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &fd_tx, sizeof(fd_tx)); + + if (sendmsg(usock, &msg, 0) < 0) + return -errno; + return 0; +} diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 45de42a027c5..d5dab986f612 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -58,6 +58,7 @@ static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; static const char dir_s3d1[] = TMP_DIR "/s3d1"; +static const char file1_s3d1[] = TMP_DIR "/s3d1/f1"; /* dir_s3d2 is a mount point. */ static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; @@ -83,6 +84,7 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; * │ ├── f1 * │ └── f2 * └── s3d1 + * ├── f1 * └── s3d2 * └── s3d3 */ @@ -208,6 +210,7 @@ static void create_layout1(struct __test_metadata *const _metadata) create_file(_metadata, file1_s2d3); create_file(_metadata, file2_s2d3); + create_file(_metadata, file1_s3d1); create_directory(_metadata, dir_s3d2); set_cap(_metadata, CAP_SYS_ADMIN); ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); @@ -230,6 +233,7 @@ static void remove_layout1(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(file1_s2d2)); EXPECT_EQ(0, remove_path(file1_s2d1)); + EXPECT_EQ(0, remove_path(file1_s3d1)); EXPECT_EQ(0, remove_path(dir_s3d3)); set_cap(_metadata, CAP_SYS_ADMIN); umount(dir_s3d2); @@ -406,9 +410,10 @@ TEST_F_FORK(layout1, inval) #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ - LANDLOCK_ACCESS_FS_READ_FILE) + LANDLOCK_ACCESS_FS_READ_FILE | \ + LANDLOCK_ACCESS_FS_TRUNCATE) -#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER +#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE #define ACCESS_ALL ( \ ACCESS_FILE | \ @@ -422,7 +427,7 @@ TEST_F_FORK(layout1, inval) LANDLOCK_ACCESS_FS_MAKE_FIFO | \ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ LANDLOCK_ACCESS_FS_MAKE_SYM | \ - ACCESS_LAST) + LANDLOCK_ACCESS_FS_REFER) /* clang-format on */ @@ -3157,6 +3162,463 @@ TEST_F_FORK(layout1, proc_pipe) ASSERT_EQ(0, close(pipe_fds[1])); } +/* Invokes truncate(2) and returns its errno or 0. */ +static int test_truncate(const char *const path) +{ + if (truncate(path, 10) < 0) + return errno; + return 0; +} + +/* + * Invokes creat(2) and returns its errno or 0. + * Closes the opened file descriptor on success. + */ +static int test_creat(const char *const path) +{ + int fd = creat(path, 0600); + + if (fd < 0) + return errno; + + /* + * Mixing error codes from close(2) and creat(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) < 0) + return errno; + return 0; +} + +/* + * Exercises file truncation when it's not restricted, + * as it was the case before LANDLOCK_ACCESS_FS_TRUNCATE existed. + */ +TEST_F_FORK(layout1, truncate_unhandled) +{ + const char *const file_r = file1_s1d1; + const char *const file_w = file2_s1d1; + const char *const file_none = file1_s1d2; + const struct rule rules[] = { + { + .path = file_r, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = file_w, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + /* Implicitly: No rights for file_none. */ + {}, + }; + + const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + int ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, handled, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Checks read right: truncate and open with O_TRUNC work, unless the + * file is attempted to be opened for writing. + */ + EXPECT_EQ(0, test_truncate(file_r)); + EXPECT_EQ(0, test_open(file_r, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_r, O_WRONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_creat(file_r)); + + /* + * Checks write right: truncate and open with O_TRUNC work, unless the + * file is attempted to be opened for reading. + */ + EXPECT_EQ(0, test_truncate(file_w)); + EXPECT_EQ(EACCES, test_open(file_w, O_RDONLY | O_TRUNC)); + EXPECT_EQ(0, test_open(file_w, O_WRONLY | O_TRUNC)); + EXPECT_EQ(0, test_creat(file_w)); + + /* + * Checks "no rights" case: truncate works but all open attempts fail, + * including creat. + */ + EXPECT_EQ(0, test_truncate(file_none)); + EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_creat(file_none)); +} + +TEST_F_FORK(layout1, truncate) +{ + const char *const file_rwt = file1_s1d1; + const char *const file_rw = file2_s1d1; + const char *const file_rt = file1_s1d2; + const char *const file_t = file2_s1d2; + const char *const file_none = file1_s1d3; + const char *const dir_t = dir_s2d1; + const char *const file_in_dir_t = file1_s2d1; + const char *const dir_w = dir_s3d1; + const char *const file_in_dir_w = file1_s3d1; + const struct rule rules[] = { + { + .path = file_rwt, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = file_rw, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = file_rt, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = file_t, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + /* Implicitly: No access rights for file_none. */ + { + .path = dir_t, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = dir_w, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE; + int ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, handled, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks read, write and truncate rights: truncation works. */ + EXPECT_EQ(0, test_truncate(file_rwt)); + EXPECT_EQ(0, test_open(file_rwt, O_RDONLY | O_TRUNC)); + EXPECT_EQ(0, test_open(file_rwt, O_WRONLY | O_TRUNC)); + + /* Checks read and write rights: no truncate variant works. */ + EXPECT_EQ(EACCES, test_truncate(file_rw)); + EXPECT_EQ(EACCES, test_open(file_rw, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_rw, O_WRONLY | O_TRUNC)); + + /* + * Checks read and truncate rights: truncation works. + * + * Note: Files can get truncated using open() even with O_RDONLY. + */ + EXPECT_EQ(0, test_truncate(file_rt)); + EXPECT_EQ(0, test_open(file_rt, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_rt, O_WRONLY | O_TRUNC)); + + /* Checks truncate right: truncate works, but can't open file. */ + EXPECT_EQ(0, test_truncate(file_t)); + EXPECT_EQ(EACCES, test_open(file_t, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_t, O_WRONLY | O_TRUNC)); + + /* Checks "no rights" case: No form of truncation works. */ + EXPECT_EQ(EACCES, test_truncate(file_none)); + EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC)); + + /* + * Checks truncate right on directory: truncate works on contained + * files. + */ + EXPECT_EQ(0, test_truncate(file_in_dir_t)); + EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_WRONLY | O_TRUNC)); + + /* + * Checks creat in dir_w: This requires the truncate right when + * overwriting an existing file, but does not require it when the file + * is new. + */ + EXPECT_EQ(EACCES, test_creat(file_in_dir_w)); + + ASSERT_EQ(0, unlink(file_in_dir_w)); + EXPECT_EQ(0, test_creat(file_in_dir_w)); +} + +/* Invokes ftruncate(2) and returns its errno or 0. */ +static int test_ftruncate(int fd) +{ + if (ftruncate(fd, 10) < 0) + return errno; + return 0; +} + +TEST_F_FORK(layout1, ftruncate) +{ + /* + * This test opens a new file descriptor at different stages of + * Landlock restriction: + * + * without restriction: ftruncate works + * something else but truncate restricted: ftruncate works + * truncate restricted and permitted: ftruncate works + * truncate restricted and not permitted: ftruncate fails + * + * Whether this works or not is expected to depend on the time when the + * FD was opened, not to depend on the time when ftruncate() was + * called. + */ + const char *const path = file1_s1d1; + const __u64 handled1 = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + const struct rule layer1[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const __u64 handled2 = LANDLOCK_ACCESS_FS_TRUNCATE; + const struct rule layer2[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + {}, + }; + const __u64 handled3 = LANDLOCK_ACCESS_FS_TRUNCATE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + const struct rule layer3[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + int fd_layer0, fd_layer1, fd_layer2, fd_layer3, ruleset_fd; + + fd_layer0 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + + ruleset_fd = create_ruleset(_metadata, handled1, layer1); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer1 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + + ruleset_fd = create_ruleset(_metadata, handled2, layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer2 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + EXPECT_EQ(0, test_ftruncate(fd_layer2)); + + ruleset_fd = create_ruleset(_metadata, handled3, layer3); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer3 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + EXPECT_EQ(0, test_ftruncate(fd_layer2)); + EXPECT_EQ(EACCES, test_ftruncate(fd_layer3)); + + ASSERT_EQ(0, close(fd_layer0)); + ASSERT_EQ(0, close(fd_layer1)); + ASSERT_EQ(0, close(fd_layer2)); + ASSERT_EQ(0, close(fd_layer3)); +} + +/* clang-format off */ +FIXTURE(ftruncate) {}; +/* clang-format on */ + +FIXTURE_SETUP(ftruncate) +{ + prepare_layout(_metadata); + create_file(_metadata, file1_s1d1); +} + +FIXTURE_TEARDOWN(ftruncate) +{ + EXPECT_EQ(0, remove_path(file1_s1d1)); + cleanup_layout(_metadata); +} + +FIXTURE_VARIANT(ftruncate) +{ + const __u64 handled; + const __u64 permitted; + const int expected_open_result; + const int expected_ftruncate_result; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, w_w) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, t_t) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_w) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .expected_open_result = 0, + .expected_ftruncate_result = EACCES, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_t) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = EACCES, +}; + +TEST_F_FORK(ftruncate, open_and_ftruncate) +{ + const char *const path = file1_s1d1; + const struct rule rules[] = { + { + .path = path, + .access = variant->permitted, + }, + {}, + }; + int fd, ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd = open(path, O_WRONLY); + EXPECT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0)); + if (fd >= 0) { + EXPECT_EQ(variant->expected_ftruncate_result, + test_ftruncate(fd)); + ASSERT_EQ(0, close(fd)); + } +} + +TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) +{ + int child, fd, status; + int socket_fds[2]; + + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, + socket_fds)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* + * Enables Landlock in the child process, open a file descriptor + * where truncation is forbidden and send it to the + * non-landlocked parent process. + */ + const char *const path = file1_s1d1; + const struct rule rules[] = { + { + .path = path, + .access = variant->permitted, + }, + {}, + }; + int fd, ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd = open(path, O_WRONLY); + ASSERT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0)); + + if (fd >= 0) { + ASSERT_EQ(0, send_fd(socket_fds[0], fd)); + ASSERT_EQ(0, close(fd)); + } + + ASSERT_EQ(0, close(socket_fds[0])); + + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + if (variant->expected_open_result == 0) { + fd = recv_fd(socket_fds[1]); + ASSERT_LE(0, fd); + + EXPECT_EQ(variant->expected_ftruncate_result, + test_ftruncate(fd)); + ASSERT_EQ(0, close(fd)); + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + ASSERT_EQ(0, close(socket_fds[0])); + ASSERT_EQ(0, close(socket_fds[1])); +} + +TEST(memfd_ftruncate) +{ + int fd; + + fd = memfd_create("name", MFD_CLOEXEC); + ASSERT_LE(0, fd); + + /* + * Checks that ftruncate is permitted on file descriptors that are + * created in ways other than open(2). + */ + EXPECT_EQ(0, test_ftruncate(fd)); + + ASSERT_EQ(0, close(fd)); +} + /* clang-format off */ FIXTURE(layout1_bind) {}; /* clang-format on */ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a3ea3d4a206d..291144c284fb 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -123,6 +123,11 @@ endef clean: $(CLEAN) +# Enables to extend CFLAGS and LDFLAGS from command line, e.g. +# make USERCFLAGS=-Werror USERLDFLAGS=-static +CFLAGS += $(USERCFLAGS) +LDFLAGS += $(USERLDFLAGS) + # When make O= with kselftest target from main level # the following aren't defined. # diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 162c41e9bcae..1562aa7d60b0 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -888,6 +888,17 @@ TEST_F(NCI, deinit) &msg); ASSERT_EQ(rc, 0); EXPECT_EQ(get_dev_enable_state(&msg), 0); + + /* Test that operations that normally send packets to the driver + * don't cause issues when the device is already closed. + * Note: the send of NFC_CMD_DEV_UP itself still succeeds it's just + * that the device won't actually be up. + */ + close(self->virtual_nci_fd); + self->virtual_nci_fd = -1; + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_UP, self->dev_idex); + EXPECT_EQ(rc, 0); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 3d7adee7a3e6..9cc84114741d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +csum cmsg_sender +diag_uid fin_ack_lat gro hwtstamp_config @@ -25,6 +27,7 @@ rxtimestamp sk_bind_sendto_listen sk_connect_zero_addr socket +so_incoming_cpu so_netns_cookie so_txtime stress_reuseport_listen diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 69c58362c0ed..3007e98a6d64 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -71,6 +71,10 @@ TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_PROGS += test_ingress_egress_chaining.sh +TEST_GEN_PROGS += so_incoming_cpu +TEST_PROGS += sctp_vrf.sh +TEST_GEN_FILES += sctp_hello +TEST_GEN_FILES += csum TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 969620ae9928..1e4b397cece6 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob unix_connect +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..5b88f7129fea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct unix_diag_req *udr; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index 8ccaf8732eb2..4abaf16d2077 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -1,14 +1,51 @@ # SPDX-License-Identifier: GPL-2.0 CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../../lib CCINCLUDE += -I../../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf +$(MAKE_DIRS): + mkdir -p $@ TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o all: $(TEST_CUSTOM_PROGS) -$(OUTPUT)/%.o: %.c - $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS) + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9bf0..bd89198cd817 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c new file mode 100644 index 000000000000..82a1c1839da6 --- /dev/null +++ b/tools/testing/selftests/net/csum.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. + * + * The test runs on two machines to exercise the NIC. For this reason it + * is not integrated in kselftests. + * + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) + * + * Rx: + * + * The sender sends packets with a known checksum field using PF_INET(6) + * SOCK_RAW sockets. + * + * good packet: $CMD [-t] + * bad packet: $CMD [-t] -E + * + * The receiver reads UDP packets with a UDP socket. This is not an + * option for TCP packets ('-t'). Optionally insert an iptables filter + * to avoid these entering the real protocol stack. + * + * The receiver also reads all packets with a PF_PACKET socket, to + * observe whether both good and bad packets arrive on the host. And to + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. + * + * Tx: + * + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx + * checksum offload. + * + * The sender can sends packets with a UDP socket. + * + * Optionally crafts a packet that sums up to zero to verify that the + * device writes negative zero 0xFFFF in this case to distinguish from + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case + * by choosing a specific source port. + * + * good packet: $CMD -U + * zero csum: $CMD -U -Z + * + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, + * to cover more protocols. PF_PACKET requires passing src and dst mac + * addresses. + * + * good packet: $CMD -s $smac -d $dmac -p [-t] + * + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, + * to verify that the NIC passes these packets unmodified. + * + * Argument '-e' adds a transport mode encapsulation header between + * network and transport header. This will fail for devices that parse + * headers. Should work on devices that implement protocol agnostic tx + * checksum offload (NETIF_F_HW_CSUM). + * + * Argument '-r $SEED' optionally randomizes header, payload and length + * to increase coverage between packets sent. SEED 1 further chooses a + * different seed for each run (and logs this for reproducibility). It + * is advised to enable this for extra coverage in continuous testing. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +static bool cfg_bad_csum; +static int cfg_family = PF_INET6; +static int cfg_num_pkt = 4; +static bool cfg_do_rx = true; +static bool cfg_do_tx = true; +static bool cfg_encap; +static char *cfg_ifname = "eth0"; +static char *cfg_mac_dst; +static char *cfg_mac_src; +static int cfg_proto = IPPROTO_UDP; +static int cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static uint16_t cfg_port_dst = 34000; +static uint16_t cfg_port_src = 33000; +static uint16_t cfg_port_src_encap = 33001; +static unsigned int cfg_random_seed; +static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ +static bool cfg_send_pfpacket; +static bool cfg_send_udp; +static int cfg_timeout_ms = 2000; +static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ +static bool cfg_zero_sum; /* create packet that adds up to zero */ + +static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; +static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; + +#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) +#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) +#define MAX_PAYLOAD_LEN 1024 + +/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ +struct udp_encap_hdr { + uint8_t nexthdr; + uint8_t padding[3]; +}; + +/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ +static void *iph_addr_p; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); +} + +static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) +{ + uint16_t *words = (uint16_t *)data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + + if (len & 1) + sum += ((unsigned char *)data)[len - 1]; + + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t checksum(void *th, uint16_t proto, size_t len) +{ + uint32_t sum; + int alen; + + alen = cfg_family == PF_INET6 ? 32 : 8; + + sum = checksum_nofold(iph_addr_p, alen, 0); + sum += htons(proto); + sum += htons(len); + + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ + if (cfg_do_tx && cfg_send_pfpacket) + return ~checksum_fold(NULL, 0, sum); + else + return checksum_fold(th, len, sum); +} + +static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) +{ + struct iphdr *iph = _iph; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = proto; + iph->saddr = cfg_saddr4.sin_addr.s_addr; + iph->daddr = cfg_daddr4.sin_addr.s_addr; + iph->tot_len = htons(sizeof(*iph) + len); + iph->check = checksum_fold(iph, sizeof(*iph), 0); + + iph_addr_p = &iph->saddr; + + return iph + 1; +} + +static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) +{ + struct ipv6hdr *ip6h = _ip6h; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 64; + ip6h->saddr = cfg_saddr6.sin6_addr; + ip6h->daddr = cfg_daddr6.sin6_addr; + + iph_addr_p = &ip6h->saddr; + + return ip6h + 1; +} + +static void *build_packet_udp(void *_uh) +{ + struct udphdr *uh = _uh; + + uh->source = htons(cfg_port_src); + uh->dest = htons(cfg_port_dst); + uh->len = htons(sizeof(*uh) + cfg_payload_len); + uh->check = 0; + + /* choose source port so that uh->check adds up to zero */ + if (cfg_zero_sum) { + uh->source = 0; + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", + cfg_port_src, ntohs(uh->source)); + cfg_port_src = ntohs(uh->source); + } + + if (cfg_zero_disable) + uh->check = 0; + else + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + if (cfg_bad_csum) + uh->check = ~uh->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); + return uh + 1; +} + +static void *build_packet_tcp(void *_th) +{ + struct tcphdr *th = _th; + + th->source = htons(cfg_port_src); + th->dest = htons(cfg_port_dst); + th->doff = 5; + th->check = 0; + + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); + + if (cfg_bad_csum) + th->check = ~th->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); + return th + 1; +} + +static char *build_packet_udp_encap(void *_uh) +{ + struct udphdr *uh = _uh; + struct udp_encap_hdr *eh = _uh + sizeof(*uh); + + /* outer dst == inner dst, to simplify BPF filter + * outer src != inner src, to demultiplex on recv + */ + uh->dest = htons(cfg_port_dst); + uh->source = htons(cfg_port_src_encap); + uh->check = 0; + uh->len = htons(sizeof(*uh) + + sizeof(*eh) + + sizeof(struct tcphdr) + + cfg_payload_len); + + eh->nexthdr = IPPROTO_TCP; + + return build_packet_tcp(eh + 1); +} + +static char *build_packet(char *buf, int max_len, int *len) +{ + uint8_t proto; + char *off; + int tlen; + + if (cfg_random_seed) { + int *buf32 = (void *)buf; + int i; + + for (i = 0; i < (max_len / sizeof(int)); i++) + buf32[i] = rand(); + } else { + memset(buf, cfg_payload_char, max_len); + } + + if (cfg_proto == IPPROTO_UDP) + tlen = sizeof(struct udphdr) + cfg_payload_len; + else + tlen = sizeof(struct tcphdr) + cfg_payload_len; + + if (cfg_encap) { + proto = IPPROTO_UDP; + tlen += ENC_HEADER_LEN; + } else { + proto = cfg_proto; + } + + if (cfg_family == PF_INET) + off = build_packet_ipv4(buf, proto, tlen); + else + off = build_packet_ipv6(buf, proto, tlen); + + if (cfg_encap) + off = build_packet_udp_encap(off); + else if (cfg_proto == IPPROTO_UDP) + off = build_packet_udp(off); + else + off = build_packet_tcp(off); + + /* only pass the payload, but still compute headers for cfg_zero_sum */ + if (cfg_send_udp) { + *len = cfg_payload_len; + return off; + } + + *len = off - buf + cfg_payload_len; + return buf; +} + +static int open_inet(int ipproto, int protocol) +{ + int fd; + + fd = socket(cfg_family, ipproto, protocol); + if (fd == -1) + error(1, errno, "socket inet"); + + if (cfg_family == PF_INET6) { + /* may have been updated by cfg_zero_sum */ + cfg_saddr6.sin6_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) + error(1, errno, "bind dgram 6"); + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "connect dgram 6"); + } else { + /* may have been updated by cfg_zero_sum */ + cfg_saddr4.sin_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) + error(1, errno, "bind dgram 4"); + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "connect dgram 4"); + } + + return fd; +} + +static int open_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket packet"); + + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt packet_vnet_ndr"); + + return fd; +} + +static void send_inet(int fd, const char *buf, int len) +{ + int ret; + + ret = write(fd, buf, len); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %d", ret); +} + +static void eth_str_to_addr(const char *str, unsigned char *eth) +{ + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ð[0], ð[1], ð[2], ð[3], ð[4], ð[5]) != 6) + error(1, 0, "cannot parse mac addr %s", str); +} + +static void send_packet(int fd, const char *buf, int len) +{ + struct virtio_net_hdr vh = {0}; + struct sockaddr_ll addr = {0}; + struct msghdr msg = {0}; + struct ethhdr eth; + struct iovec iov[3]; + int ret; + + addr.sll_family = AF_PACKET; + addr.sll_halen = ETH_ALEN; + addr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!addr.sll_ifindex) + error(1, errno, "if_nametoindex %s", cfg_ifname); + + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (cfg_family == PF_INET6) { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + addr.sll_protocol = htons(ETH_P_IPV6); + } else { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); + addr.sll_protocol = htons(ETH_P_IP); + } + + if (cfg_encap) + vh.csum_start += ENC_HEADER_LEN; + + if (cfg_proto == IPPROTO_TCP) { + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); + } else { + vh.csum_offset = __builtin_offsetof(struct udphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); + } + + eth_str_to_addr(cfg_mac_src, eth.h_source); + eth_str_to_addr(cfg_mac_dst, eth.h_dest); + eth.h_proto = addr.sll_protocol; + + iov[0].iov_base = &vh; + iov[0].iov_len = sizeof(vh); + + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + + iov[2].iov_base = (void *)buf; + iov[2].iov_len = len; + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "sendmsg packet"); + if (ret != sizeof(vh) + sizeof(eth) + len) + error(1, errno, "sendmsg packet: %u", ret); +} + +static int recv_prepare_udp(void) +{ + int fd; + + fd = socket(cfg_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF r"); + + if (cfg_family == PF_INET6) { + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "bind r"); + } else { + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "bind r"); + } + + return fd; +} + +/* Filter out all traffic that is not cfg_proto with our destination port. + * + * Otherwise background noise may cause PF_PACKET receive queue overflow, + * dropping the expected packets and failing the test. + */ +static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static void recv_prepare_packet_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + + if (cfg_family == AF_INET) + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), + sizeof(struct iphdr) + off_dport); + else + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), + sizeof(struct ipv6hdr) + off_dport); +} + +static void recv_prepare_packet_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + + if (cfg_family == PF_INET) + laddr.sll_protocol = htons(ETH_P_IP); + else + laddr.sll_protocol = htons(ETH_P_IPV6); + + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, 0, "if_nametoindex %s", cfg_ifname); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind pf_packet"); +} + +static int recv_prepare_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF p"); + + /* enable auxdata to recv checksum status (valid vs unknown) */ + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) + error(1, errno, "setsockopt auxdata"); + + /* install filter to restrict packet flow to match */ + recv_prepare_packet_filter(fd); + + /* bind to address family to start packet flow */ + recv_prepare_packet_bind(fd); + + return fd; +} + +static int recv_udp(int fd) +{ + static char buf[MAX_PAYLOAD_LEN]; + int ret, count = 0; + + while (1) { + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv r"); + + fprintf(stderr, "rx: udp: len=%u\n", ret); + count++; + } + + return count; +} + +static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) +{ + uint16_t csum; + + csum = checksum(th, cfg_proto, len); + + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", + sport, len, csum_field, csum); + + /* csum must be zero unless cfg_bad_csum indicates bad csum */ + if (csum && !cfg_bad_csum) { + fprintf(stderr, "pkt: bad csum\n"); + return 1; + } else if (cfg_bad_csum && !csum) { + fprintf(stderr, "pkt: good csum, while bad expected\n"); + return 1; + } + + if (cfg_zero_sum && csum_field != 0xFFFF) { + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); + return 1; + } + + return 0; +} + +static int recv_verify_packet_tcp(void *th, int len) +{ + struct tcphdr *tcph = th; + + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) + return -1; + + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); +} + +static int recv_verify_packet_udp_encap(void *th, int len) +{ + struct udp_encap_hdr *eh = th; + + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) + return -1; + + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); +} + +static int recv_verify_packet_udp(void *th, int len) +{ + struct udphdr *udph = th; + + if (len < sizeof(*udph)) + return -1; + + if (udph->dest != htons(cfg_port_dst)) + return -1; + + if (udph->source == htons(cfg_port_src_encap)) + return recv_verify_packet_udp_encap(udph + 1, + len - sizeof(*udph)); + + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); +} + +static int recv_verify_packet_ipv4(void *nh, int len) +{ + struct iphdr *iph = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*iph) || iph->protocol != proto) + return -1; + + iph_addr_p = &iph->saddr; + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); + else + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); +} + +static int recv_verify_packet_ipv6(void *nh, int len) +{ + struct ipv6hdr *ip6h = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) + return -1; + + iph_addr_p = &ip6h->saddr; + + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + else + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); +} + +/* return whether auxdata includes TP_STATUS_CSUM_VALID */ +static bool recv_verify_packet_csum(struct msghdr *msg) +{ + struct tpacket_auxdata *aux = NULL; + struct cmsghdr *cm; + + if (msg->msg_flags & MSG_CTRUNC) + error(1, 0, "cmsg: truncated"); + + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level != SOL_PACKET || + cm->cmsg_type != PACKET_AUXDATA) + error(1, 0, "cmsg: level=%d type=%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) + error(1, 0, "cmsg: len=%lu expected=%lu", + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); + + aux = (void *)CMSG_DATA(cm); + } + + if (!aux) + error(1, 0, "cmsg: no auxdata"); + + return aux->tp_status & TP_STATUS_CSUM_VALID; +} + +static int recv_packet(int fd) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + unsigned long total = 0, bad_csums = 0, bad_validations = 0; + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + struct pkt *buf = (void *)_buf; + struct msghdr msg = {0}; + struct iovec iov; + int len, ret; + + iov.iov_base = _buf; + iov.iov_len = sizeof(_buf); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + while (1) { + msg.msg_flags = 0; + + len = recvmsg(fd, &msg, MSG_DONTWAIT); + if (len == -1 && errno == EAGAIN) + break; + if (len == -1) + error(1, errno, "recv p"); + + if (cfg_family == PF_INET6) + ret = recv_verify_packet_ipv6(buf, len); + else + ret = recv_verify_packet_ipv4(buf, len); + + if (ret == -1 /* skip: non-matching */) + continue; + + total++; + if (ret == 1) + bad_csums++; + + /* Fail if kernel returns valid for known bad csum. + * Do not fail if kernel does not validate a good csum: + * Absence of validation does not imply invalid. + */ + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); + bad_validations++; + } + } + + if (bad_csums || bad_validations) + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", + total, bad_csums, bad_validations); + + return total; +} + +static void parse_args(int argc, char *const argv[]) +{ + const char *daddr = NULL, *saddr = NULL; + int c; + + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + break; + case '6': + cfg_family = PF_INET6; + break; + case 'd': + cfg_mac_dst = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'e': + cfg_encap = true; + break; + case 'E': + cfg_bad_csum = true; + break; + case 'i': + cfg_ifname = optarg; + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'L': + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'r': + cfg_random_seed = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_send_pfpacket = true; + break; + case 'R': + /* only Rx: used with two machine tests */ + cfg_do_tx = false; + break; + case 's': + cfg_mac_src = optarg; + break; + case 'S': + saddr = optarg; + break; + case 't': + cfg_proto = IPPROTO_TCP; + break; + case 'T': + /* only Tx: used with two machine tests */ + cfg_do_rx = false; + break; + case 'u': + cfg_proto = IPPROTO_UDP; + break; + case 'U': + /* send using real udp socket, + * to exercise tx checksum offload + */ + cfg_send_udp = true; + break; + case 'z': + cfg_zero_disable = true; + break; + case 'Z': + cfg_zero_sum = true; + break; + default: + error(1, 0, "unknown arg %c", c); + } + } + + if (!daddr || !saddr) + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); + + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) + error(1, 0, "Transmit with pf_packet requires mac addresses"); + + if (cfg_payload_len > MAX_PAYLOAD_LEN) + error(1, 0, "Payload length exceeds max"); + + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) + error(1, 0, "Only UDP supports zero csum"); + + if (cfg_zero_sum && !cfg_send_udp) + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); + if (cfg_zero_sum && cfg_bad_csum) + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); + if (cfg_zero_sum && cfg_random_seed) + error(1, 0, "Cannot combine zero checksum conversion with randomization"); + + if (cfg_family == PF_INET6) { + cfg_saddr6.sin6_port = htons(cfg_port_src); + cfg_daddr6.sin6_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -S"); + } else { + cfg_saddr4.sin_port = htons(cfg_port_src); + cfg_daddr4.sin_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -S"); + } + + if (cfg_do_tx && cfg_random_seed) { + /* special case: time-based seed */ + if (cfg_random_seed == 1) + cfg_random_seed = (unsigned int)gettimeofday_ms(); + srand(cfg_random_seed); + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); + } +} + +static void do_tx(void) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + char *buf; + int fd, len, i; + + buf = build_packet(_buf, sizeof(_buf), &len); + + if (cfg_send_pfpacket) + fd = open_packet(); + else if (cfg_send_udp) + fd = open_inet(SOCK_DGRAM, 0); + else + fd = open_inet(SOCK_RAW, IPPROTO_RAW); + + for (i = 0; i < cfg_num_pkt; i++) { + if (cfg_send_pfpacket) + send_packet(fd, buf, len); + else + send_inet(fd, buf, len); + + /* randomize each packet individually to increase coverage */ + if (cfg_random_seed) { + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; + buf = build_packet(_buf, sizeof(_buf), &len); + } + } + + if (close(fd)) + error(1, errno, "close tx"); +} + +static void do_rx(int fdp, int fdr) +{ + unsigned long count_udp = 0, count_pkt = 0; + long tleft, tstop; + struct pollfd pfd; + + tstop = gettimeofday_ms() + cfg_timeout_ms; + tleft = cfg_timeout_ms; + + do { + pfd.events = POLLIN; + pfd.fd = fdp; + if (poll(&pfd, 1, tleft) == -1) + error(1, errno, "poll"); + + if (pfd.revents & POLLIN) + count_pkt += recv_packet(fdp); + + if (cfg_proto == IPPROTO_UDP) + count_udp += recv_udp(fdr); + + tleft = tstop - gettimeofday_ms(); + } while (tleft > 0); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdp)) + error(1, errno, "close p"); + + if (count_pkt < cfg_num_pkt) + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", + count_pkt, cfg_num_pkt); + + if (cfg_proto == IPPROTO_UDP) { + if (cfg_bad_csum && count_udp) + error(1, 0, "rx: unexpected packets at udp"); + if (!cfg_bad_csum && !count_udp) + error(1, 0, "rx: missing packets at udp"); + } +} + +int main(int argc, char *const argv[]) +{ + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ + + parse_args(argc, argv); + + /* open receive sockets before transmitting */ + if (cfg_do_rx) { + fdp = recv_prepare_packet(); + fdr = recv_prepare_udp(); + } + + if (cfg_do_tx) + do_tx(); + + if (cfg_do_rx) + do_rx(fdp, fdr); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 31c3b6ebd388..21ca91473c09 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4196,10 +4196,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2271a8727f62..5637b5dadabd 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1711,13 +1711,21 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1735,6 +1743,35 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index a9c5c1be5088..453ae006fbcf 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ + bridge_mdb_host.sh \ bridge_mdb_port_down.sh \ bridge_mld.sh \ bridge_port_isolation.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 1162836f8f32..2aa66d2a1702 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -96,9 +96,6 @@ cleanup() switch_destroy - # Always cleanup the mcast group - ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null - h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh index 5b02b6b60ce7..dc92d32464f6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -1,7 +1,16 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="locked_port_ipv4 locked_port_ipv6 locked_port_vlan" +ALL_TESTS=" + locked_port_ipv4 + locked_port_ipv6 + locked_port_vlan + locked_port_mab + locked_port_mab_roam + locked_port_mab_config + locked_port_mab_flush +" + NUM_NETIFS=4 CHECK_TC="no" source lib.sh @@ -166,6 +175,150 @@ locked_port_ipv6() log_test "Locked port ipv6" } +locked_port_mab() +{ + RET=0 + check_port_mab_support || return 0 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work before locking port" + + bridge link set dev $swp1 learning on locked on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on a locked port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null + check_fail $? "FDB entry created before enabling MAB" + + bridge link set dev $swp1 learning on locked on mab on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on MAB enabled port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "Locked FDB entry not created" + + bridge fdb replace `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after replacing FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_fail $? "FDB entry marked as locked after replacement" + + bridge fdb del `mac_get $h1` dev $swp1 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB" +} + +# Check that entries cannot roam to a locked port, but that entries can roam +# to an unlocked port. +locked_port_mab_roam() +{ + local mac=a0:b0:c0:c0:b0:a0 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "No locked entry on first injection" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2" + check_err $? "Entry did not roam to an unlocked port" + + bridge fdb get $mac br br0 vlan 1 | grep -q "locked" + check_fail $? "Entry roamed with locked flag on" + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1" + check_fail $? "Entry roamed back to locked port" + + bridge fdb del $mac vlan 1 dev $swp2 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB roam" +} + +# Check that MAB can only be enabled on a port that is both locked and has +# learning enabled. +locked_port_mab_config() +{ + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked off mab on &> /dev/null + check_fail $? "MAB enabled while port is unlocked" + + bridge link set dev $swp1 learning off locked on mab on &> /dev/null + check_fail $? "MAB enabled while port has learning disabled" + + bridge link set dev $swp1 learning on locked on mab on + check_err $? "Failed to enable MAB when port is locked and has learning enabled" + + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB configuration" +} + +# Check that locked FDB entries are flushed from a port when MAB is disabled. +locked_port_mab_flush() +{ + local locked_mac1=00:01:02:03:04:05 + local unlocked_mac1=00:01:02:03:04:06 + local locked_mac2=00:01:02:03:04:07 + local unlocked_mac2=00:01:02:03:04:08 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + bridge link set dev $swp2 learning on locked on mab on + + # Create regular and locked FDB entries on each port. + bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static + bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand + bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on first port" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand + bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on second port" + + # Disable MAB on the first port and check that only the first locked + # FDB entry was flushed. + bridge link set dev $swp1 mab off + + bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on first port was flushed after disabling MAB" + + bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on second port was flushed after disabling MAB" + + bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null + check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB" + + bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Locked FDB entry on second port was flushed after disabling MAB" + + bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static + bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static + + bridge link set dev $swp2 learning on locked off mab off + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB FDB flush" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index b1ba6876dd86..2fa5973c0c28 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -1,42 +1,107 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# -# Verify that adding host mdb entries work as intended for all types of -# multicast filters: ipv4, ipv6, and mac -ALL_TESTS="mdb_add_del_test" -NUM_NETIFS=2 +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR0 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ -TEST_GROUP_IP4="225.1.2.3" -TEST_GROUP_IP6="ff02::42" -TEST_GROUP_MAC="01:00:01:c0:ff:ee" +ALL_TESTS=" + cfg_test + fwd_test + ctrl_test +" +NUM_NETIFS=4 source lib.sh +source tc_common.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 } h1_destroy() { - simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 + vlan_destroy $h1 20 + vlan_destroy $h1 10 + simple_if_fini $h1 } -switch_create() +h2_create() { - # Enable multicast filtering - ip link add dev br0 type bridge mcast_snooping 1 + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} - ip link set dev $swp1 master br0 +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + simple_if_fini $h2 +} +switch_create() +{ + ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 + bridge vlan add vid 10 dev br0 self + bridge vlan add vid 20 dev br0 self ip link set dev br0 up + + ip link set dev $swp1 master br0 ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + tc qdisc add dev br0 clsact + tc qdisc add dev $h2 clsact } switch_destroy() { + tc qdisc del dev $h2 clsact + tc qdisc del dev br0 clsact + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + bridge vlan del vid 20 dev br0 self + bridge vlan del vid 10 dev br0 self ip link del dev br0 } @@ -45,9 +110,14 @@ setup_prepare() h1=${NETIFS[p1]} swp1=${NETIFS[p2]} + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + vrf_prepare + forwarding_enable h1_create + h2_create switch_create } @@ -56,48 +126,1039 @@ cleanup() pre_cleanup switch_destroy + h2_destroy h1_destroy + forwarding_restore vrf_cleanup } -do_mdb_add_del() +cfg_test_host_common() +{ + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local state=$1; shift + local invalid_state=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port br0 grp $grp $state vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp" + check_err $? "Failed to add $name host entry" + + bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null + check_fail $? "Managed to replace $name host entry" + + bridge mdb del dev br0 port br0 grp $grp $state vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp" + check_fail $? "Failed to delete $name host entry" + + # Check error cases. + bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a $invalid_state state" + + bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a source" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + filter_mode exclude &> /dev/null + check_fail $? "Managed to add $name host entry with a filter mode" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add $name host entry with a source list" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + proto 123 &> /dev/null + check_fail $? "Managed to add $name host entry with a protocol" + + log_test "Common host entries configuration tests ($name)" +} + +# Check configuration of host entries from all types. +cfg_test_host() +{ + echo + log_info "# Host entries configuration tests" + + cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent" + cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent" + cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \ + "permanent" "temp" +} + +cfg_test_port_common() +{ + local name=$1;shift + local grp_key=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_err $? "Failed to add $name entry" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_err $? "Failed to replace $name entry" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_fail $? "Failed to delete $name entry" + + # Check default protocol and replacement. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static" + check_err $? "$name entry not added with default \"static\" protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + proto 123 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123" + check_err $? "Failed to replace protocol of $name entry" + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + # Check behavior when VLAN is not specified. + bridge mdb add dev br0 port $swp1 $grp_key permanent + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_err $? "$name entry with VLAN 10 not added when VLAN was not specified" + bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + check_err $? "$name entry with VLAN 20 not added when VLAN was not specified" + + bridge mdb del dev br0 port $swp1 $grp_key permanent + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified" + bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified" + + # Check behavior when bridge port is down. + ip link set dev $swp1 down + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + check_err $? "Failed to add $name permanent entry when bridge port is down" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null + check_fail $? "Managed to add $name temporary entry when bridge port is down" + + ip link set dev $swp1 up + setup_wait_dev $swp1 + + # Check error cases. + ip link set dev br0 down + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name entry when bridge is down" + ip link set dev br0 up + + ip link set dev br0 type bridge mcast_snooping 0 + bridge mdb add dev br0 port $swp1 $grp_key permanent vid \ + 10 &> /dev/null + check_fail $? "Managed to add $name entry when multicast snooping is disabled" + ip link set dev br0 type bridge mcast_snooping 1 + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \ + &> /dev/null + check_fail $? "Managed to add $name entry with an invalid VLAN" + + log_test "Common port group entries configuration tests ($name)" +} + +src_list_create() +{ + local src_prefix=$1; shift + local num_srcs=$1; shift + local src_list + local i + + for i in $(seq 1 $num_srcs); do + src_list=${src_list},${src_prefix}${i} + done + + echo $src_list | cut -c 2- +} + +__cfg_test_port_ip_star_g() +{ + local name=$1; shift + local grp=$1; shift + local invalid_grp=$1; shift + local src_prefix=$1; shift + local src1=${src_prefix}1 + local src2=${src_prefix}2 + local src3=${src_prefix}3 + local max_srcs=31 + local num_srcs + + RET=0 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude" + check_err $? "Default filter mode is not \"exclude\"" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check basic add and delete behavior. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + check_err $? "(*, G) entry not created" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry not created" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + check_fail $? "(*, G) entry not deleted" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_fail $? "(S, G) entry not deleted" + + ## State (permanent / temp) tests. + + # Check that group and source timer are not set for permanent entries. + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "permanent" + check_err $? "(*, G) entry not added as \"permanent\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "permanent" + check_err $? "(S, G) entry not added as \"permanent\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_err $? "(*, G) \"permanent\" entry has a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_err $? "\"permanent\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is set for temporary (*, G) EXCLUDE, but not + # the source timer. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_err $? "\"blocked\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is not set for temporary (*, G) INCLUDE, but + # that the source timer is set. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) entry not added as \"temp\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_err $? "(*, G) INCLUDE entry has a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_fail $? "Source entry does not have a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is never set for (S, G) entries. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q " 0.00" + check_err $? "(S, G) entry has a pending group timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Filter mode (include / exclude) tests. + + # Check that (*, G) INCLUDE entries are added with correct filter mode + # and that (S, G) entries are not marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "include" + check_err $? "(*, G) INCLUDE not added with \"include\" filter mode" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_fail $? "(S, G) entry marked as \"blocked\" when should not" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that (*, G) EXCLUDE entries are added with correct filter mode + # and that (S, G) entries are marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "exclude" + check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_err $? "(S, G) entry not marked as \"blocked\" when should" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Protocol tests. + + # Check that (*, G) and (S, G) entries are added with the specified + # protocol. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "zebra" + check_err $? "(*, G) entry not added with \"zebra\" protocol" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "zebra" + check_err $? "(S, G) entry not marked added with \"zebra\" protocol" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Replace tests. + + # Check that state can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "permanent" + check_err $? "(*, G) entry not marked as \"permanent\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "permanent" + check_err $? "(S, G) entry not marked as \"permanent\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) entry not marked as \"temp\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) entry not marked as \"temp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that filter mode can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "include" + check_err $? "(*, G) not marked with \"include\" filter mode after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_fail $? "(S, G) marked as \"blocked\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "exclude" + check_err $? "(*, G) not marked with \"exclude\" filter mode after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_err $? "(S, G) not marked as \"blocked\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that sources can be added to and removed from the source list. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src2,$src3 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry for source $src1 not created after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + check_err $? "(S, G) entry for source $src2 not created after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + check_err $? "(S, G) entry for source $src3 not created after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src3 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry for source $src1 not created after second replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + check_fail $? "(S, G) entry for source $src2 created after second replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + check_err $? "(S, G) entry for source $src3 not created after second replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that protocol can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto bgp + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "bgp" + check_err $? "(*, G) protocol not changed to \"bgp\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "bgp" + check_err $? "(S, G) protocol not changed to \"bgp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Star exclude tests. + + # Check star exclude functionality. When adding a new EXCLUDE (*, G), + # it needs to be also added to all (S, G) entries for proper + # replication. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode include source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \ + grep "$src1" | grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10 + + ## Error cases tests. + + bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid group" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + &> /dev/null + check_fail $? "Managed to add an INCLUDE entry with an empty source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $grp &> /dev/null + check_fail $? "Managed to add an entry with an invalid source in source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list and no filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1,$src2,$src3 &> /dev/null + check_fail $? "Managed to add a source that already has a forwarding entry" + bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10 + + # Check maximum number of sources. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $max_srcs) + num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \ + grep "src" | wc -l) + [[ $num_srcs -eq $max_srcs ]] + check_err $? "Failed to configure maximum number of sources ($max_srcs)" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $((max_srcs + 1))) \ + &> /dev/null + check_fail $? "Managed to exceed maximum number of sources ($max_srcs)" + + log_test "$name (*, G) port group entries configuration tests" +} + +cfg_test_port_ip_star_g() +{ + echo + log_info "# Port group entries configuration tests - (*, G)" + + cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1" + cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1" + __cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2." + __cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::" +} + +__cfg_test_port_ip_sg() +{ + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local grp_key="grp $grp src $src" + + RET=0 + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include" + check_err $? "Default filter mode is not \"include\"" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that entries can be added as both permanent and temp and that + # group timer is set correctly. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "permanent" + check_err $? "Entry not added as \"permanent\" when should" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_err $? "\"permanent\" entry has a pending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "temp" + check_err $? "Entry not added as \"temp\" when should" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_fail $? "\"temp\" entry has an unpending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check error cases. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid source" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp + bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that we can replace available attributes. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123 + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "111" + check_err $? "Failed to replace protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "permanent" + check_err $? "Entry not marked as \"permanent\" after replace" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_err $? "Entry has a pending group timer after replace" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "temp" + check_err $? "Entry not marked as \"temp\" after replace" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_fail $? "Entry has an unpending group timer after replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check star exclude functionality. When adding a (S, G), all matching + # (*, G) ports need to be added to it. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \ + grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + log_test "$name (S, G) port group entries configuration tests" +} + +cfg_test_port_ip_sg() +{ + echo + log_info "# Port group entries configuration tests - (S, G)" + + cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1" + cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1" + __cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1" + __cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1" +} + +cfg_test_port_ip() +{ + cfg_test_port_ip_star_g + cfg_test_port_ip_sg +} + +__cfg_test_port_l2() +{ + local grp="01:02:03:04:05:06" + + RET=0 + + bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with unicast MAC" + + bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with a source" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + source_list 00:01:02:03:04:05 &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + log_test "L2 (*, G) port group entries configuration tests" +} + +cfg_test_port_l2() +{ + echo + log_info "# Port group entries configuration tests - L2" + + cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06" + __cfg_test_port_l2 +} + +# Check configuration of regular (port) entries of all types. +cfg_test_port() +{ + cfg_test_port_ip + cfg_test_port_l2 +} + +cfg_test() { - local group=$1 - local flag=$2 + cfg_test_host + cfg_test_port +} + +__fwd_test_host_ip() +{ + local grp=$1; shift + local src=$1; shift + local mode=$1; shift + local name + local eth_type RET=0 - bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null - check_err $? "Failed adding $group to br0, port br0" - if [ -z "$flag" ]; then - flag="temp" + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" fi - bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null - check_err $? "$group not added with $flag flag" + tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \ + action drop + + # Packet should only be flooded to multicast router ports when there is + # no matching MDB entry. The bridge is not configured as a multicast + # router port. + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after flood" + + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $grp temp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after installing a regular entry" + + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $grp temp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $grp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name host entries forwarding tests" +} + +fwd_test_host_ip() +{ + __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4" + __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6" +} + +fwd_test_host_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + # Packet should be flooded and locally received when there is no + # matching MDB entry. + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after flood" + + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after installing a regular entry" + + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + + tc filter del dev br0 ingress protocol all pref 1 handle 1 flower + + log_test "L2 host entries forwarding tests" +} + +fwd_test_host() +{ + # Disable multicast router on the bridge to ensure that packets are + # only locally received when a matching host entry is present. + ip link set dev br0 type bridge mcast_router 0 + + fwd_test_host_ip + fwd_test_host_l2 + + ip link set dev br0 type bridge mcast_router 1 +} + +__fwd_test_port_ip() +{ + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mode=$1; shift + local filter_mode=$1; shift + local name + local eth_type + local src_list + + RET=0 + + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" + fi + + # The valid source is the one we expect to get packets from after + # adding the entry. + if [[ $filter_mode == "include" ]]; then + src_list=$valid_src + else + src_list=$invalid_src + fi + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $valid_src action drop + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $invalid_src action drop + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet from valid source received on H2 before adding entry" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode $filter_mode source_list $src_list - bridge mdb del dev br0 port br0 grp $group 2>/dev/null - check_err $? "Failed deleting $group from br0, port br0" + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet from valid source not received on H2 after adding entry" - bridge mdb show dev br0 | grep -q $group >/dev/null - check_err_fail 1 $? "$group still in mdb after delete" + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 after adding entry" - log_test "MDB add/del group $group to bridge port br0" + bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \ + filter_mode exclude + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source not received on H2 after allowing all sources" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source not received on H2 after allowing all sources" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source received on H2 after deleting entry" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name port group \"$filter_mode\" entries forwarding tests" +} + +fwd_test_port_ip() +{ + __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude" + __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "exclude" + __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include" + __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "include" +} + +fwd_test_port_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet not received on H2 after adding entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower + + log_test "L2 port entries forwarding tests" +} + +fwd_test_port() +{ + # Disable multicast flooding to ensure that packets are only forwarded + # out of a port when a matching port group entry is present. + bridge link set dev $swp2 mcast_flood off + + fwd_test_port_ip + fwd_test_port_l2 + + bridge link set dev $swp2 mcast_flood on +} + +fwd_test() +{ + echo + log_info "# Forwarding tests" + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip -6 address add fe80::1/64 nodad dev br0 + ip link set dev br0 type bridge mcast_querier 1 + # Wait the default Query Response Interval (10 seconds) for the bridge + # to determine that there are no other queriers in the network. + sleep 10 + + fwd_test_host + fwd_test_port + + ip link set dev br0 type bridge mcast_querier 0 + ip -6 address del fe80::1/64 dev br0 } -mdb_add_del_test() +igmpv3_is_in_get() { - do_mdb_add_del $TEST_GROUP_MAC permanent - do_mdb_add_del $TEST_GROUP_IP4 - do_mdb_add_del $TEST_GROUP_IP6 + local igmpv3 + + igmpv3=$(: + )"22:"$( : Type - Membership Report + )"00:"$( : Reserved + )"2a:f8:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"00:01:"$( : Number of Sources + )"ef:01:01:01:"$( : Multicast Address - 239.1.1.1 + )"c0:00:02:02"$( : Source Address - 192.0.2.2 + ) + + echo $igmpv3 +} + +ctrl_igmpv3_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received IGMP packet. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get) -q + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2 + check_fail $? "Permanent entry affected by IGMP packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the IGMP packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get) -q + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \ + grep -q 192.0.2.2 + check_err $? "Source not add to source list" + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \ + grep -q "src 192.0.2.2" + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10 + + log_test "IGMPv3 MODE_IS_INCLUE tests" +} + +mldv2_is_in_get() +{ + local hbh + local icmpv6 + + hbh=$(: + )"3a:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + ) + + icmpv6=$(: + )"8f:"$( : Type - MLDv2 Report + )"00:"$( : Code + )"45:39:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"00:01:"$( : Number of Sources + )"ff:0e:00:00:00:00:00:00:"$( : Multicast address - ff0e::1 + )"00:00:00:00:00:00:00:01:"$( : + )"20:01:0d:b8:00:01:00:00:"$( : Source Address - 2001:db8:1::2 + )"00:00:00:00:00:00:00:02:"$( : + ) + + echo ${hbh}${icmpv6} +} + +ctrl_mldv2_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received MLD packet. + bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ + grep -q 2001:db8:1::2 + check_fail $? "Permanent entry affected by MLD packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the MLD packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \ + grep -q 2001:db8:1::2 + check_err $? "Source not add to source list" + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ + grep -q "src 2001:db8:1::2" + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10 + + log_test "MLDv2 MODE_IS_INCLUDE tests" +} + +ctrl_test() +{ + echo + log_info "# Control packets tests" + + ctrl_igmpv3_is_in_test + ctrl_mldv2_is_in_test } trap cleanup EXIT setup_prepare setup_wait - tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh new file mode 100755 index 000000000000..b1ba6876dd86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that adding host mdb entries work as intended for all types of +# multicast filters: ipv4, ipv6, and mac + +ALL_TESTS="mdb_add_del_test" +NUM_NETIFS=2 + +TEST_GROUP_IP4="225.1.2.3" +TEST_GROUP_IP6="ff02::42" +TEST_GROUP_MAC="01:00:01:c0:ff:ee" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 + + ip link set dev $swp1 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +do_mdb_add_del() +{ + local group=$1 + local flag=$2 + + RET=0 + bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null + check_err $? "Failed adding $group to br0, port br0" + + if [ -z "$flag" ]; then + flag="temp" + fi + + bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null + check_err $? "$group not added with $flag flag" + + bridge mdb del dev br0 port br0 grp $group 2>/dev/null + check_err $? "Failed deleting $group from br0, port br0" + + bridge mdb show dev br0 | grep -q $group >/dev/null + check_err_fail 1 $? "$group still in mdb after delete" + + log_test "MDB add/del group $group to bridge port br0" +} + +mdb_add_del_test() +{ + do_mdb_add_del $TEST_GROUP_MAC permanent + do_mdb_add_del $TEST_GROUP_IP4 + do_mdb_add_del $TEST_GROUP_IP6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index 8748d1b1d95b..72dfbeaf56b9 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -59,6 +59,9 @@ switch_create() switch_destroy() { + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp1 down diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 601990c6881b..f1de525cfa55 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -503,25 +503,30 @@ devlink_trap_drop_cleanup() tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } -devlink_trap_stats_test() +devlink_trap_stats_check() { - local test_name=$1; shift local trap_name=$1; shift local send_one="$@" local t0_packets local t1_packets - RET=0 - t0_packets=$(devlink_trap_rx_packets_get $trap_name) $send_one && sleep 1 t1_packets=$(devlink_trap_rx_packets_get $trap_name) - if [[ $t1_packets -eq $t0_packets ]]; then - check_err 1 "Trap stats did not increase" - fi + [[ $t1_packets -ne $t0_packets ]] +} + +devlink_trap_stats_test() +{ + local test_name=$1; shift + + RET=0 + + devlink_trap_stats_check "$@" + check_err $? "Trap stats did not increase" log_test "$test_name" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 3ffb9d6c0950..1c4f866de7d7 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -137,6 +137,14 @@ check_locked_port_support() fi } +check_port_mab_support() +{ + if ! bridge -d link show | grep -q "mab"; then + echo "SKIP: iproute2 too old; MacAuth feature not supported." + return $ksft_skip + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit $ksft_skip diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile new file mode 100644 index 000000000000..92c1d9d080cd --- /dev/null +++ b/tools/testing/selftests/net/hsr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := hsr_ping.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config new file mode 100644 index 000000000000..22061204fb69 --- /dev/null +++ b/tools/testing/selftests/net/hsr/config @@ -0,0 +1,4 @@ +CONFIG_IPV6=y +CONFIG_NET_SCH_NETEM=m +CONFIG_HSR=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh new file mode 100755 index 000000000000..df9143538708 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +ksft_skip=4 +ipv6=true + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage $0 + exit 1 + ;; +esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" + +cleanup() +{ + local netns + for netns in "$ns1" "$ns2" "$ns3" ;do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +echo "INFO: preparing interfaces." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# +# Interfaces +ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" +ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" +ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + +# HSRv0. +ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 +ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 +ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + +# IP for HSR +ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 +ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad +ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 +ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad +ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 +ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + +# All Links up +ip -net "$ns1" link set ns1eth1 up +ip -net "$ns1" link set ns1eth2 up +ip -net "$ns1" link set hsr1 up + +ip -net "$ns2" link set ns2eth1 up +ip -net "$ns2" link set ns2eth2 up +ip -net "$ns2" link set hsr2 up + +ip -net "$ns3" link set ns3eth1 up +ip -net "$ns3" link set ns3eth2 up +ip -net "$ns3" link set hsr3 up + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + + +echo "INFO: Initial validation ping." +# Each node has to be able each one. +do_ping "$ns1" 100.64.0.2 +do_ping "$ns2" 100.64.0.1 +do_ping "$ns3" 100.64.0.1 +stop_if_error "Initial validation failed." + +do_ping "$ns1" 100.64.0.3 +do_ping "$ns2" 100.64.0.3 +do_ping "$ns3" 100.64.0.2 + +do_ping "$ns1" dead:beef:1::2 +do_ping "$ns1" dead:beef:1::3 +do_ping "$ns2" dead:beef:1::1 +do_ping "$ns2" dead:beef:1::2 +do_ping "$ns3" dead:beef:1::1 +do_ping "$ns3" dead:beef:1::2 + +stop_if_error "Initial validation failed." + +# Wait until supervisor all supervision frames have been processed and the node +# entries have been merged. Otherwise duplicate frames will be observed which is +# valid at this stage. +WAIT=5 +while [ ${WAIT} -gt 0 ] +do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let WAIT = WAIT - 1 +done + +# Just a safety delay in case the above check didn't handle it. +sleep 1 + +echo "INFO: Longer ping test." +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" dead:beef:1::2 +do_ping_long "$ns1" 100.64.0.3 +do_ping_long "$ns1" dead:beef:1::3 + +stop_if_error "Longer ping test failed." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" dead:beef:1::1 +do_ping_long "$ns2" 100.64.0.3 +do_ping_long "$ns2" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" dead:beef:1::1 +do_ping_long "$ns3" 100.64.0.2 +do_ping_long "$ns3" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +echo "INFO: Cutting one link." +do_ping_long "$ns1" 100.64.0.3 & + +sleep 3 +ip -net "$ns3" link set ns3eth1 down +wait + +ip -net "$ns3" link set ns3eth1 up + +stop_if_error "Failed with one link down." + +echo "INFO: Delay the link and drop a few packages." +tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms +tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" 100.64.0.2 +stop_if_error "Failed with delay and packetloss." + +echo "INFO: All good." +exit $ret diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 515859a5168b..24bcd7b9bdb2 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index e54653ea2ed4..8a8266957bc5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -83,6 +83,7 @@ struct cfg_cmsg_types { struct cfg_sockopt_types { unsigned int transparent:1; + unsigned int mptfo:1; }; struct tcp_inq_state { @@ -90,6 +91,13 @@ struct tcp_inq_state { bool expect_eof; }; +struct wstate { + char buf[8192]; + unsigned int len; + unsigned int off; + unsigned int total_len; +}; + static struct tcp_inq_state tcp_inq; static struct cfg_cmsg_types cfg_cmsg_types; @@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf) } } +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) + perror("TCP_FASTOPEN"); +} + static int do_ulp_so(int sock, const char *name) { return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); @@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr, if (cfg_sockopt_types.transparent) set_transparent(sock, pf); + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr, static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto, - struct addrinfo **peer) + struct addrinfo **peer, + int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + int syn_copied = 0; int sock = -1; hints.ai_family = pf; @@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr, if (cfg_mark) set_mark(sock, cfg_mark); - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { - *peer = a; - break; /* success */ + if (cfg_sockopt_types.mptfo) { + if (!winfo->total_len) + winfo->total_len = winfo->len = read(infd, winfo->buf, + sizeof(winfo->buf)); + + syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied >= 0) { + winfo->off = syn_copied; + winfo->len -= syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + perror("connect()"); + close(sock); + sock = -1; } - - perror("connect()"); - close(sock); - sock = -1; } freeaddrinfo(addr); @@ -571,14 +612,14 @@ static void shut_wr(int fd) shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; - char wbuf[8192]; + unsigned int total_wlen = 0, total_rlen = 0; set_nonblock(peerfd, true); @@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } if (fds.revents & POLLOUT) { - if (wlen == 0) { - woff = 0; - wlen = read(infd, wbuf, sizeof(wbuf)); + if (winfo->len == 0) { + winfo->off = 0; + winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } - if (wlen > 0) { + if (winfo->len > 0) { ssize_t bw; /* limit the total amount of written data to the trunc value */ - if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) - wlen = cfg_truncate - total_wlen; + if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) + winfo->len = cfg_truncate - total_wlen; - bw = do_rnd_write(peerfd, wbuf + woff, wlen); + bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { if (cfg_rcv_trunc) return 0; @@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after return 111; } - woff += bw; - wlen -= bw; + winfo->off += bw; + winfo->len -= bw; total_wlen += bw; - } else if (wlen == 0) { + } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int spool_buf(int fd, struct wstate *winfo) +{ + while (winfo->len) { + int ret = write(fd, winfo->buf + winfo->off, winfo->len); + + if (ret < 0) { + perror("write"); + return 4; + } + winfo->off += ret; + winfo->len -= ret; + } + return 0; +} + +static int do_mmap(int infd, int outfd, unsigned int size, + struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); - ssize_t ret = 0, off = 0; + ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { @@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) return 1; } - rem = size; + ret = spool_buf(outfd, winfo); + if (ret < 0) + return ret; + + rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); @@ -772,8 +833,16 @@ static int get_infd_size(int fd) return (int)count; } -static int do_sendfile(int infd, int outfd, unsigned int count) +static int do_sendfile(int infd, int outfd, unsigned int count, + struct wstate *winfo) { + int ret = spool_buf(outfd, winfo); + + if (ret < 0) + return ret; + + count -= winfo->total_len; + while (count > 0) { ssize_t r; @@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, + struct wstate *winfo) { int err; @@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); if (err) return err; @@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; @@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); } else { - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; @@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } -static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; struct timespec start, end; @@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) switch (cfg_mode) { case CFG_MODE_POLL: - ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, + winfo); break; case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; default: @@ -999,6 +1072,7 @@ static void maybe_close(int fd) int main_loop_s(int listensock) { struct sockaddr_storage ss; + struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; @@ -1033,7 +1107,8 @@ again: SOCK_TEST_TCPULP(remotesock, 0); - copyfd_io(fd, remotesock, 1, true); + memset(&winfo, 0, sizeof(winfo)); + copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; @@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name) return; } + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); exit(1); } @@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen) int main_loop(void) { - int fd, ret, fd_in = 0; + int fd = 0, ret, fd_in = 0; struct addrinfo *peer; + struct wstate winfo; + + if (cfg_input && cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); + memset(&winfo, 0, sizeof(winfo)); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; @@ -1186,14 +1273,13 @@ again: if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); - if (cfg_input) { + if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s:%d", cfg_input, errno); } - /* close the client socket open only if we are not going to reconnect */ - ret = copyfd_io(fd_in, fd, 1, 0); + ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) return ret; @@ -1210,6 +1296,7 @@ again: xerror("can't reconnect: %d", errno); if (cfg_input) close(fd_in); + memset(&winfo, 0, sizeof(winfo)); goto again; } else { close(fd); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 621af6895f4d..a43d3e2f59bb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -274,8 +274,7 @@ check_transfer() check_mptcp_disabled() { - local disabled_ns - disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" + local disabled_ns="ns_disabled-$rndh" ip netns add ${disabled_ns} || exit $ksft_skip # net.mptcp.enabled should be enabled by default @@ -762,9 +761,25 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +run_tests_mptfo() +{ + echo "INFO: with MPTFO start" + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 + + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 + echo "INFO: with MPTFO end" +} + run_tests_disconnect() { - local peekmode="$1" local old_cin=$cin local old_sin=$sin @@ -772,7 +787,6 @@ run_tests_disconnect() # force do_transfer to cope with the multiple tranmissions sin="$cin.disconnect" - sin_disconnect=$old_sin cin="$cin.disconnect" cin_disconnect="$old_cin" connect_per_transfer=3 @@ -783,7 +797,6 @@ run_tests_disconnect() # restore previous status sin=$old_sin - sin_disconnect="$cout".disconnect cin=$old_cin cin_disconnect="$cin".disconnect connect_per_transfer=1 @@ -901,6 +914,10 @@ run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" stop_if_error "Tests with peek mode have failed" +# MPTFO (MultiPath TCP Fatopen tests) +run_tests_mptfo +stop_if_error "Tests with MPTFO have failed" + # connect to ns4 ip address, ns2 should intercept/proxy run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent dead:beef:3::1 "tproxy ipv6" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f3dd5f2a0272..d11d3d566608 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -26,6 +26,10 @@ ip_mptcp=0 check_invert=0 validate_checksum=0 init=0 +evts_ns1="" +evts_ns2="" +evts_ns1_pid=0 +evts_ns2_pid=0 declare -A all_tests declare -a only_tests_ids @@ -59,8 +63,9 @@ init_partial() { capout=$(mktemp) - local rndh - rndh=$(mktemp -u XXXXXX) + local sec rndh + sec=$(date +%s) + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -153,6 +158,8 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) trap cleanup EXIT @@ -164,6 +171,7 @@ cleanup() { rm -f "$cin" "$cout" "$sinfail" rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -rf $evts_ns1 $evts_ns2 cleanup_partial } @@ -319,6 +327,18 @@ reset_with_fail() index 100 || exit 1 } +reset_with_events() +{ + reset "${1}" || return 1 + + :> "$evts_ns1" + :> "$evts_ns2" + ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! +} + fail_test() { ret=1 @@ -472,6 +492,12 @@ kill_wait() wait $1 2>/dev/null } +kill_events_pids() +{ + kill_wait $evts_ns1_pid + kill_wait $evts_ns2_pid +} + pm_nl_set_limits() { local ns=$1 @@ -672,10 +698,6 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid local userspace_pm=0 - local evts_ns1 - local evts_ns1_pid - local evts_ns2 - local evts_ns2_pid :> "$cout" :> "$sout" @@ -752,17 +774,6 @@ do_transfer() addr_nr_ns2=${addr_nr_ns2:9} fi - if [ $userspace_pm -eq 1 ]; then - evts_ns1=$(mktemp) - evts_ns2=$(mktemp) - :> "$evts_ns1" - :> "$evts_ns2" - ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & - evts_ns1_pid=$! - ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & - evts_ns2_pid=$! - fi - local local_addr if is_v6 "${connect_addr}"; then local_addr="::" @@ -829,7 +840,8 @@ do_transfer() if [ $userspace_pm -eq 0 ]; then pm_nl_add_endpoint $ns1 $addr flags signal else - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id @@ -980,12 +992,6 @@ do_transfer() kill $cappid fi - if [ $userspace_pm -eq 1 ]; then - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid - rm -rf $evts_ns1 $evts_ns2 - fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -2152,7 +2158,7 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2165,7 +2171,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2178,7 +2184,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2507,6 +2513,57 @@ backup_tests() fi } +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED + +AF_INET=2 +AF_INET6=10 + +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + if [ $type ] && [ $type = $e_type ] && + [ $family ] && [ $family = $e_family ] && + [ $saddr ] && [ $saddr = $e_saddr ] && + [ $sport ] && [ $sport = $e_sport ]; then + stdbuf -o0 -e0 printf "[ ok ]\n" + return 0 + fi + fail_test + stdbuf -o0 -e0 printf "[fail]\n" +} + add_addr_ports_tests() { # signal address with port @@ -2531,7 +2588,8 @@ add_addr_ports_tests() fi # single address with port, remove - if reset "remove single address with port"; then + # pm listener events + if reset_with_events "remove single address with port"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 @@ -2539,6 +2597,10 @@ add_addr_ports_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 1 chk_rm_nr 1 1 invert + + verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100 + kill_events_pids fi # subflow and signal with port, remove @@ -2959,22 +3021,24 @@ userspace_tests() fi # userspace pm add & remove address - if reset "userspace pm add & remove address"; then + if reset_with_events "userspace pm add & remove address"; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + kill_events_pids fi # userspace pm create destroy subflow - if reset "userspace pm create destroy subflow"; then + if reset_with_events "userspace pm create destroy subflow"; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow chk_join_nr 1 1 1 chk_rm_nr 0 1 + kill_events_pids fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 0879da915014..1b70c0a304ce 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -10,13 +10,19 @@ ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" -do_all_tests=1 + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns_sbox="ns_sbox-$rndh" add_mark_rules() { local ns=$1 local m=$2 + local t for t in iptables ip6tables; do # just to debug: check we have multiple subflows connection requests ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT @@ -31,12 +37,8 @@ add_mark_rules() init() { - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" - - for netns in "$ns1" "$ns2";do + local netns + for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 @@ -44,6 +46,7 @@ init() ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 done + local i for i in `seq 1 4`; do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i @@ -73,7 +76,8 @@ init() cleanup() { - for netns in "$ns1" "$ns2"; do + local netns + for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done rm -f "$cin" "$cout" @@ -103,15 +107,17 @@ check_mark() local ns=$1 local af=$2 - tables=iptables + local tables=iptables if [ $af -eq 6 ];then tables=ip6tables fi + local counters values counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) values=${counters%DROP*} + local v for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 @@ -131,9 +137,9 @@ print_file_err() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 cmp "$in" "$out" > /dev/null 2>&1 if [ $? -ne 0 ] ;then @@ -156,18 +162,18 @@ is_v6() do_transfer() { - listener_ns="$1" - connector_ns="$2" - cl_proto="$3" - srv_proto="$4" - connect_addr="$5" + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" - port=12001 + local port=12001 :> "$cout" :> "$sout" - mptcp_connect="./mptcp_connect -r 20" + local mptcp_connect="./mptcp_connect -r 20" local local_addr if is_v6 "${connect_addr}"; then @@ -180,7 +186,7 @@ do_transfer() ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ ${local_addr} < "$sin" > "$sout" & - spid=$! + local spid=$! sleep 1 @@ -189,12 +195,12 @@ do_transfer() $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ $connect_addr < "$cin" > "$cout" & - cpid=$! + local cpid=$! wait $cpid - retc=$? + local retc=$? wait $spid - rets=$? + local rets=$? if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 @@ -229,9 +235,9 @@ do_transfer() make_file() { - name=$1 - who=$2 - size=$3 + local name=$1 + local who=$2 + local size=$3 dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" @@ -243,7 +249,7 @@ do_mptcp_sockopt_tests() { local lret=0 - ./mptcp_sockopt + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? if [ $lret -ne 0 ]; then @@ -252,7 +258,7 @@ do_mptcp_sockopt_tests() return fi - ./mptcp_sockopt -6 + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then @@ -264,9 +270,9 @@ do_mptcp_sockopt_tests() run_tests() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" local lret=0 do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} @@ -281,8 +287,8 @@ run_tests() do_tcpinq_test() { - ip netns exec "$ns1" ./mptcp_inq "$@" - lret=$? + ip netns exec "$ns_sbox" ./mptcp_inq "$@" + local lret=$? if [ $lret -ne 0 ];then ret=$lret echo "FAIL: mptcp_inq $@" 1>&2 @@ -297,9 +303,7 @@ do_tcpinq_tests() { local lret=0 - ip netns exec "$ns1" iptables -F - ip netns exec "$ns1" ip6tables -F - + local args for args in "-t tcp" "-r tcp"; do do_tcpinq_test $args lret=$? diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ffa13a957a36..9f22f7e5027d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -148,9 +149,6 @@ do_transfer() :> "$sout" :> "$capout" - local addr_port - addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - if $capture; then local capuser if [ -z $SUDO_USER ] ; then @@ -173,7 +171,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ 0.0.0.0 < "$sin" > "$sout" & local spid=$! @@ -181,7 +179,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $time \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ 10.0.3.3 < "$cin" > "$cout" & local cpid=$! @@ -247,9 +245,10 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measured in ms, account for transfer size, affegated link speed + # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) - local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3229725b64b0..a29deb9fa024 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -11,11 +11,17 @@ ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED REMOVED=7 # MPTCP_EVENT_REMOVED SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED AF_INET=2 AF_INET6=10 -evts_pid=0 +file="" +server_evts="" +client_evts="" +server_evts_pid=0 +client_evts_pid=0 client4_pid=0 server4_pid=0 client6_pid=0 @@ -33,7 +39,7 @@ client_addr_id=${RANDOM:0:2} server_addr_id=${RANDOM:0:2} sec=$(date +%s) -rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) +rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -47,7 +53,7 @@ cleanup() { echo "cleanup" - rm -rf $file + rm -rf $file $client_evts $server_evts # Terminate the MPTCP connection and related processes if [ $client4_pid -ne 0 ]; then @@ -62,8 +68,11 @@ cleanup() if [ $server6_pid -ne 0 ]; then kill_wait $server6_pid fi - if [ $evts_pid -ne 0 ]; then - kill_wait $evts_pid + if [ $server_evts_pid -ne 0 ]; then + kill_wait $server_evts_pid + fi + if [ $client_evts_pid -ne 0 ]; then + kill_wait $client_evts_pid fi local netns for netns in "$ns1" "$ns2" ;do @@ -113,8 +122,9 @@ make_file() make_connection() { - local file - file=$(mktemp) + if [ -z "$file" ]; then + file=$(mktemp) + fi make_file "$file" "client" local is_v6=$1 @@ -132,16 +142,24 @@ make_connection() # Capture netlink events over the two network namespaces running # the MPTCP client and server - local client_evts - client_evts=$(mktemp) + if [ -z "$client_evts" ]; then + client_evts=$(mktemp) + fi :>"$client_evts" + if [ $client_evts_pid -ne 0 ]; then + kill_wait $client_evts_pid + fi ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & - local client_evts_pid=$! - local server_evts - server_evts=$(mktemp) + client_evts_pid=$! + if [ -z "$server_evts" ]; then + server_evts=$(mktemp) + fi :>"$server_evts" + if [ $server_evts_pid -ne 0 ]; then + kill_wait $server_evts_pid + fi ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & - local server_evts_pid=$! + server_evts_pid=$! sleep 0.5 # Run the server @@ -159,7 +177,6 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill_wait $client_evts_pid local client_token local client_port @@ -171,11 +188,10 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill_wait $server_evts_pid - server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") - server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$server_evts") - rm -f "$client_evts" "$server_evts" "$file" + server_token=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + server_serverside=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && [ "$server_serverside" = 1 ] @@ -239,13 +255,8 @@ verify_announce_event() test_announce() { - local evts - evts=$(mktemp) # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # ADD_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1)) @@ -253,7 +264,7 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t" if [ "$type" = "" ] then @@ -264,71 +275,63 @@ test_announce() fi # ADD_ADDR from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\ + verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" # ADD_ADDR6 from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" # ADD_ADDR from the client to server machine using a new port - :>"$evts" + :>"$server_evts" client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" # ADD_ADDR6 from the server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" # ADD_ADDR from the server to client machine using a new port - :>"$evts" + :>"$client_evts" server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - - kill_wait $evts_pid - rm -f "$evts" } verify_remove_event() @@ -356,14 +359,8 @@ verify_remove_event() test_remove() { - local evts - evts=$(mktemp) - # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # RM_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1 )) @@ -372,7 +369,7 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\ $client_addr_id local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -386,7 +383,7 @@ test_remove() $invalid_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\ $invalid_id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -395,40 +392,35 @@ test_remove() fi # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR6 from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - - kill_wait $evts_pid + verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ @@ -436,27 +428,24 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\ $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR from the server to client machine - :>"$evts" + :>"$client_evts" server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR6 from the server to client machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - - kill_wait $evts_pid - rm -f "$evts" + verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } verify_subflow_events() @@ -532,13 +521,8 @@ verify_subflow_events() test_subflows() { - local evts - evts=$(mktemp) # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # Attempt to add a listener at 10.0.2.2:<subflow-port> ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ @@ -551,25 +535,25 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ rport "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine @@ -583,31 +567,31 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" @@ -622,44 +606,39 @@ test_subflows() listener_pid=$! # ADD_ADDR from client to server machine using a new port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # Attempt to add a listener at 10.0.2.1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ @@ -672,24 +651,24 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine @@ -703,17 +682,17 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ "$AF_INET6" "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23"\ "$server_addr_id" "ns2" "ns1" @@ -721,14 +700,14 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\ + verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR6 from server to client machine @@ -742,38 +721,35 @@ test_subflows() listener_pid=$! # ADD_ADDR from server to client machine using a new port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 - - kill_wait $evts_pid - rm -f "$evts" } test_prio() @@ -807,11 +783,85 @@ test_prio() fi } +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + if [ $type ] && [ $type = $e_type ] && + [ $family ] && [ $family = $e_family ] && + [ $saddr ] && [ $saddr = $e_saddr ] && + [ $sport ] && [ $sport = $e_sport ]; then + stdbuf -o0 -e0 printf "[OK]\n" + return 0 + fi + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 +} + +test_listener() +{ + # Capture events on the network namespace running the client + :>$client_evts + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ + $client4_port > /dev/null 2>&1 & + local listener_pid=$! + + verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ + $client_addr_id > /dev/null 2>&1 + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport $client4_port token $server4_token > /dev/null 2>&1 + sleep 0.5 + + # Delete the listener from the client ns, if one was created + kill_wait $listener_pid + + verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port +} + make_connection make_connection "v6" test_announce test_remove test_subflows test_prio +test_listener exit 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 736e358dc549..dfe3d287f01d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -686,10 +686,12 @@ setup_xfrm() { } setup_nettest_xfrm() { - which nettest >/dev/null - if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - return 1 + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi fi [ ${1} -eq 6 ] && proto="-6" || proto="" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 0900c5438fbb..275491be3da2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c new file mode 100644 index 000000000000..f02f1f95d227 --- /dev/null +++ b/tools/testing/selftests/net/sctp_hello.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len) +{ + if (ss->ss_family == AF_INET) { + struct sockaddr_in *a = (struct sockaddr_in *)ss; + + a->sin_addr.s_addr = inet_addr(ip); + a->sin_port = htons(atoi(port)); + *len = sizeof(*a); + } else { + struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss; + + a->sin6_family = AF_INET6; + inet_pton(AF_INET6, ip, &a->sin6_addr); + a->sin6_port = htons(atoi(port)); + *len = sizeof(*a); + } +} + +static int do_client(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + char buf[] = "hello"; + int csk, ret, len; + + if (argc < 5) { + printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]); + return -1; + } + + bzero((void *)&ss, sizeof(ss)); + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (csk < 0) { + printf("failed to create socket\n"); + return -1; + } + + if (argc >= 7) { + set_addr(&ss, argv[5], argv[6], &len); + ret = bind(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = connect(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to connect to peer\n"); + return -1; + } + + ret = send(csk, buf, strlen(buf) + 1, 0); + if (ret < 0) { + printf("failed to send msg %d\n", ret); + return -1; + } + close(csk); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + int lsk, csk, ret, len; + char buf[20]; + + if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s server|client ...\n", argv[0]); + return -1; + } + + if (!strcmp(argv[1], "client")) + return do_client(argc, argv); + + if (argc < 5) { + printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]); + return -1; + } + + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (lsk < 0) { + printf("failed to create lsk\n"); + return -1; + } + + if (argc >= 6) { + ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE, + argv[5], strlen(argv[5]) + 1); + if (ret < 0) { + printf("failed to bind to device\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = bind(lsk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + + ret = listen(lsk, 5); + if (ret < 0) { + printf("failed to listen on port\n"); + return -1; + } + + csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL); + if (csk < 0) { + printf("failed to accept new client\n"); + return -1; + } + + ret = recv(csk, buf, sizeof(buf), 0); + if (ret <= 0) { + printf("failed to recv msg %d\n", ret); + return -1; + } + close(csk); + close(lsk); + + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh new file mode 100755 index 000000000000..c721e952e5f3 --- /dev/null +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP VRF. +# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1 +# SERVER_NS +# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 + +CLIENT_NS1="client-ns1" +CLIENT_NS2="client-ns2" +CLIENT_IP4="10.0.0.1" +CLIENT_IP6="2000::1" +CLIENT_PORT=1234 + +SERVER_NS="server-ns" +SERVER_IP4="10.0.0.2" +SERVER_IP6="2000::2" +SERVER_PORT=1234 + +setup() { + modprobe sctp + modprobe sctp_diag + ip netns add $CLIENT_NS1 + ip netns add $CLIENT_NS2 + ip netns add $SERVER_NS + + ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + + ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 + ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 + + ip -n $CLIENT_NS1 link set veth1 up + ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $CLIENT_NS2 link set veth1 up + ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $SERVER_NS link add dummy1 type dummy + ip -n $SERVER_NS link set dummy1 up + ip -n $SERVER_NS link add vrf-1 type vrf table 10 + ip -n $SERVER_NS link add vrf-2 type vrf table 20 + ip -n $SERVER_NS link set vrf-1 up + ip -n $SERVER_NS link set vrf-2 up + ip -n $SERVER_NS link set veth1 master vrf-1 + ip -n $SERVER_NS link set veth2 master vrf-2 + + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2 + + ip -n $SERVER_NS link set veth1 up + ip -n $SERVER_NS link set veth2 up + ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4 + ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6 + ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6 + ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6 +} + +cleanup() { + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns del "$CLIENT_NS1" + ip netns del "$CLIENT_NS2" + ip netns del "$SERVER_NS" +} + +wait_server() { + local IFACE=$1 + local CNT=0 + + until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ + grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do + [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +do_test() { + local CLIENT_NS=$1 + local IFACE=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE 2>&1 >/dev/null & + disown + wait_server $IFACE || return $RET + timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +do_testx() { + local IFACE1=$1 + local IFACE2=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE1 2>&1 >/dev/null & + disown + wait_server $IFACE1 || return $RET + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE2 2>&1 >/dev/null & + disown + wait_server $IFACE2 || return $RET + timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +testup() { + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " + do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " + do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 05: bind veth2 in server, connect from client 1, N " + do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 06: bind veth1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N " + do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y " + do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N " + do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y " + do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" +} + +trap cleanup EXIT +setup && echo "Testing For SCTP VRF:" && \ +CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" && +CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c new file mode 100644 index 000000000000..0e04f9fef986 --- /dev/null +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/sysinfo.h> + +#include "../kselftest_harness.h" + +#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ +#define NR_SERVER self->nproc +#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) + +FIXTURE(so_incoming_cpu) +{ + int nproc; + int *servers; + union { + struct sockaddr addr; + struct sockaddr_in in_addr; + }; + socklen_t addrlen; +}; + +enum when_to_set { + BEFORE_REUSEPORT, + BEFORE_LISTEN, + AFTER_LISTEN, + AFTER_ALL_LISTEN, +}; + +FIXTURE_VARIANT(so_incoming_cpu) +{ + int when_to_set; +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport) +{ + .when_to_set = BEFORE_REUSEPORT, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen) +{ + .when_to_set = BEFORE_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen) +{ + .when_to_set = AFTER_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) +{ + .when_to_set = AFTER_ALL_LISTEN, +}; + +FIXTURE_SETUP(so_incoming_cpu) +{ + self->nproc = get_nprocs(); + ASSERT_LE(2, self->nproc); + + self->servers = malloc(sizeof(int) * NR_SERVER); + ASSERT_NE(self->servers, NULL); + + self->in_addr.sin_family = AF_INET; + self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + self->in_addr.sin_port = htons(0); + self->addrlen = sizeof(struct sockaddr_in); +} + +FIXTURE_TEARDOWN(so_incoming_cpu) +{ + int i; + + for (i = 0; i < NR_SERVER; i++) + close(self->servers[i]); + + free(self->servers); +} + +void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu) +{ + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int)); + ASSERT_EQ(ret, 0); +} + +int create_server(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant, + int cpu) +{ + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); + ASSERT_NE(fd, -1); + + if (variant->when_to_set == BEFORE_REUSEPORT) + set_so_incoming_cpu(_metadata, fd, cpu); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + + ret = bind(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == BEFORE_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + /* We don't use CLIENT_PER_SERVER here not to block + * this test at connect() if SO_INCOMING_CPU is broken. + */ + ret = listen(fd, NR_CLIENT); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == AFTER_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + return fd; +} + +void create_servers(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant) +{ + int i, ret; + + for (i = 0; i < NR_SERVER; i++) { + self->servers[i] = create_server(_metadata, self, variant, i); + + if (i == 0) { + ret = getsockname(self->servers[i], &self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + } + } + + if (variant->when_to_set == AFTER_ALL_LISTEN) { + for (i = 0; i < NR_SERVER; i++) + set_so_incoming_cpu(_metadata, self->servers[i], i); + } +} + +void create_clients(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + cpu_set_t cpu_set; + int i, j, fd, ret; + + for (i = 0; i < NR_SERVER; i++) { + CPU_ZERO(&cpu_set); + + CPU_SET(i, &cpu_set); + ASSERT_EQ(CPU_COUNT(&cpu_set), 1); + ASSERT_NE(CPU_ISSET(i, &cpu_set), 0); + + /* Make sure SYN will be processed on the i-th CPU + * and finally distributed to the i-th listener. + */ + sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + ASSERT_EQ(ret, 0); + + for (j = 0; j < CLIENT_PER_SERVER; j++) { + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(fd, -1); + + ret = connect(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + close(fd); + } + } +} + +void verify_incoming_cpu(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + int i, j, fd, cpu, ret, total = 0; + socklen_t len = sizeof(int); + + for (i = 0; i < NR_SERVER; i++) { + for (j = 0; j < CLIENT_PER_SERVER; j++) { + /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ + fd = accept(self->servers[i], &self->addr, &self->addrlen); + ASSERT_NE(fd, -1); + + ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); + ASSERT_EQ(ret, 0); + ASSERT_EQ(cpu, i); + + close(fd); + total++; + } + } + + ASSERT_EQ(total, NR_CLIENT); + TH_LOG("SO_INCOMING_CPU is very likely to be " + "working correctly with %d sockets.", total); +} + +TEST_F(so_incoming_cpu, test1) +{ + create_servers(_metadata, self, variant); + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test2) +{ + int server; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + close(server); + + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test3) +{ + int server, client; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + + create_clients(_metadata, self); + + /* Never receive any requests */ + client = accept(server, &self->addr, &self->addrlen); + ASSERT_EQ(client, -1); + + verify_incoming_cpu(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index 0a49907cd4fe..da5bfd834eff 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -32,7 +32,7 @@ DEV="eth0" # This is determined by reading the RSS indirection table using ethtool. get_rss_cfg_num_rxqs() { echo $(ethtool -x "${DEV}" | - egrep [[:space:]]+[0-9]+:[[:space:]]+ | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | cut -d: -f2- | awk '{$1=$1};1' | tr ' ' '\n' | diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 6a443ca3cd3a..0c743752669a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + # set global exit status, but never reset nonzero one. check_err() { @@ -34,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -195,8 +197,8 @@ run_all() { return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 8a1109a545db..894972877e8b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,7 +36,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & @@ -80,8 +82,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 7fe85ba51075..c9c4b9d65839 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -36,7 +38,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action @@ -81,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 1bcd82e1f662..c079565add39 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -46,7 +47,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 430895d1a2b6..2d073595c620 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -216,8 +217,8 @@ while getopts "hs:" option; do esac done -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit 1 fi @@ -288,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ @@ -311,7 +312,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh index b48e1833bc89..76645aaf2b58 100755 --- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -35,6 +35,8 @@ cleanup() { for i in 1 2;do ip netns del nsrouter$i;done } +trap cleanup EXIT + ipv4() { echo -n 192.168.$1.2 } @@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF table inet filter { counter unknown { } counter related { } + counter redir4 { } + counter redir6 { } chain input { type filter hook input priority 0; policy accept; - meta l4proto { icmp, icmpv6 } ct state established,untracked accept + icmp type "redirect" ct state "related" counter name "redir4" accept + icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept + + meta l4proto { icmp, icmpv6 } ct state established,untracked accept meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop } } @@ -279,5 +287,29 @@ else echo "ERROR: icmp error RELATED state test has failed" fi -cleanup +# add 'bad' route, expect icmp REDIRECT to be generated +ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1 + +ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null + +expect="packets 1 bytes 112" +check_counter nsclient1 "redir4" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null +expect="packets 1 bytes 192" +check_counter nsclient1 "redir6" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: icmp redirects had RELATED state" +else + echo "ERROR: icmp redirect RELATED state test has failed" +fi + exit $ret diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 69ea659caca9..22f1e1d73fa8 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -95,6 +95,7 @@ all: run sysroot: sysroot/$(ARCH)/include sysroot/$(ARCH)/include: + $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) @@ -133,3 +134,5 @@ clean: $(Q)rm -rf initramfs $(call QUIET_CLEAN, run.out) $(Q)rm -rf run.out + +.PHONY: sysroot/$(ARCH)/include diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 78bced95ac63..f14f5076fb6d 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -565,6 +565,13 @@ int run_stdlib(int min, int max) CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; + CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; + CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_20_e0); EXPECT_LT(1, memcmp("aaa\x20", "aaa\xe0", 4), 0); break; + CASE_TEST(memcmp_e0_20); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_80_e0); EXPECT_LT(1, memcmp("aaa\x80", "aaa\xe0", 4), 0); break; + CASE_TEST(memcmp_e0_80); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x80", 4), 0); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index e7ceabed7f51..7d0aa22bdc12 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -17,6 +17,7 @@ // while shifting across CPUs. #undef NDEBUG #include <assert.h> +#include <errno.h> #include <unistd.h> #include <sys/syscall.h> #include <stdlib.h> @@ -54,7 +55,7 @@ int main(void) len += sizeof(unsigned long); free(m); m = malloc(len); - } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL); fd = open("/proc/uptime", O_RDONLY); assert(fd >= 0); diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh index d5a16631b16e..0cf55f1bf654 100755 --- a/tools/testing/selftests/rcutorture/bin/config2csv.sh +++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh @@ -30,9 +30,8 @@ else fi scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" -T=/tmp/config2latex.sh.$$ +T=`mktemp -d /tmp/config2latex.sh.XXXXXX` trap 'rm -rf $T' 0 -mkdir $T cat << '---EOF---' >> $T/p.awk END { diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh index 90016c359e83..b3d2e7efa40c 100755 --- a/tools/testing/selftests/rcutorture/bin/config_override.sh +++ b/tools/testing/selftests/rcutorture/bin/config_override.sh @@ -29,9 +29,8 @@ else exit 1 fi -T=${TMPDIR-/tmp}/config_override.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' | awk ' diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index 31584cee84d7..83fac1852ab2 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -7,9 +7,8 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/abat-chk-config.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cat $1 > $T/.config diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index d6e5ce084b1c..28bdb3ac7ba6 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -15,9 +15,8 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/configinit.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T # Capture config spec file. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 0941f1ddab65..8a968fbda02c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -12,9 +12,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm-again.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T if ! test -d tools/testing/selftests/rcutorture/bin then @@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh +bootargs= dryrun= dur= default_link="cp -R" -rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" +resdir="`pwd`/tools/testing/selftests/rcutorture/res" +rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`" +got_datestamp= +got_rundir= startdate="`date`" starttime="`get_starttime`" usage () { echo "Usage: $scriptname $oldrun [ arguments ]:" + echo " --bootargs kernel-boot-arguments" + echo " --datestamp string" echo " --dryrun" echo " --duration minutes | <seconds>s | <hours>h | <days>d" echo " --link hard|soft|copy" echo " --remote" echo " --rundir /new/res/path" + echo "Command line: $scriptname $args" exit 1 } while test $# -gt 0 do case "$1" in + --bootargs|--bootarg) + checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' + bootargs="$bootargs $2" + shift + ;; + --datestamp) + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_datestamp=y + ds=$2 + rundir="$resdir/$ds" + if test -e "$rundir" + then + echo "--datestamp $2: Already exists." + usage + fi + shift + ;; --dryrun) dryrun=1 ;; @@ -113,6 +141,12 @@ do ;; --rundir) checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_rundir=y rundir=$2 if test -e "$rundir" then @@ -122,8 +156,11 @@ do shift ;; *) - echo Unknown argument $1 - usage + if test -n "$1" + then + echo Unknown argument $1 + usage + fi ;; esac shift @@ -156,7 +193,7 @@ do qemu_cmd_dir="`dirname "$i"`" kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" jitter_dir="`dirname "$kernel_dir"`" - kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i + kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i if test -n "$arg_remote" then echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh index f99b2c146f83..46b08cd16ba5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh @@ -7,9 +7,8 @@ # # Usage: kvm-assign-cpus.sh /path/to/sysfs -T=/tmp/kvm-assign-cpus.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T sysfsdir=${1-/sys/devices/system/node} if ! cd "$sysfsdir" > $T/msg 2>&1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 5ad973dca820..e28a82851f7c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -23,9 +23,8 @@ then fi resdir=${2} -T=${TMPDIR-/tmp}/test-linux.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cp ${config_template} $T/config cat << ___EOF___ >> $T/config diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index ee886b40a5d2..2b56baceb05d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -18,9 +18,8 @@ then exit 1 fi -T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 0789c5606d2a..1df7e695edf7 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -30,7 +30,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - TORTURE_SUITE="`cat $i/../torture_suite`" + TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags case "${TORTURE_SUITE}" in diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 9f0a5d5ff2dd..a2328163eba1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -34,19 +34,18 @@ fi shift # Pathnames: -# T: /tmp/kvm-remote.sh.$$ -# resdir: /tmp/kvm-remote.sh.$$/res -# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix) +# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp +# resdir: /tmp/kvm-remote.sh.NNNNNN/res +# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix) # oldrun: `pwd`/tools/testing/.../res/$otherds # # Pathname segments: -# TD: kvm-remote.sh.$$ +# TD: kvm-remote.sh.NNNNNN # ds: yyyy.mm.dd-hh.mm.ss-remote -TD=kvm-remote.sh.$$ -T=${TMPDIR-/tmp}/$TD +T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T +TD="`basename "$T"`" resdir="$T/res" ds=`date +%Y.%m.%d-%H.%M.%S`-remote diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index 1e29d656501b..c3808c490d92 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -13,9 +13,8 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T echo ---- Running batch $* # Check arguments diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index 44280582c594..76f24cd5825b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -17,9 +17,8 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T resdir="$1" if ! test -d "$resdir" @@ -109,7 +108,7 @@ do if test $kruntime -lt $seconds then echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 + grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1 killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" if test -n "$killpid" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index f4c8055dbf7a..d2a3710a5f2a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -25,9 +25,8 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh . $CONFIGFRAG/ver_functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh index d40b4e60a50c..75a2610a27f3 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -3,10 +3,14 @@ # # Transform a qemu-cmd file to allow reuse. # -# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out +# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out # # bzImage: Kernel and initrd from the same prior kvm.sh run. # console.log: File into which to place console output. +# jitter_dir: Jitter directory for TORTURE_JITTER_START and +# TORTURE_JITTER_STOP environment variables. +# seconds: Run duaration for *.shutdown_secs module parameter. +# bootargs: New kernel boot parameters. Beware of Robert Tables. # # The original qemu-cmd file is provided on standard input. # The transformed qemu-cmd file is on standard output. @@ -17,6 +21,9 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> +T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX` +trap 'rm -rf $T' 0 2 + image="$1" if test -z "$image" then @@ -41,9 +48,17 @@ then echo "Invalid duration, should be numeric in seconds: '$seconds'" exit 1 fi +bootargs="$5" + +# Build awk program. +echo "BEGIN {" > $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | + awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' | + awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +cat >> $T/bootarg.awk << '___EOF___' +} -awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ - -v seconds="$seconds" ' /^# seconds=/ { if (seconds == "") print $0; @@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ { line = ""; for (i = 1; i <= NF; i++) { - if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { - sub(/[0-9]*$/, seconds, $i); - if (line == "") - line = $i; - else - line = line " " $i; - } else if (line == "") { + if (line == "") { line = $i; } else { line = line " " $i; @@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ } else if ($i == "-kernel") { i++; line = line " " image; + } else if ($i == "-append") { + for (i++; i <= NF; i++) { + arg = $i; + lq = ""; + rq = ""; + if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) + sub(/[0-9]*$/, seconds, arg); + if (arg ~ /^"/) { + lq = substr(arg, 1, 1); + arg = substr(arg, 2); + } + if (arg ~ /"$/) { + rq = substr(arg, length($i), 1); + arg = substr(arg, 1, length($i) - 1); + } + par = arg; + gsub(/=.*$/, "", par); + j = 1; + while (bootpar[j] != "") { + if (bootpar[j] == par) { + arg = ""; + break; + } + j++; + } + if (line == "") + line = lq arg; + else + line = line " " lq arg; + } + for (j in bootarg) + line = line " " bootarg[j]; + line = line rq; } } print line; -}' +} +___EOF___ + +awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ + -v seconds="$seconds" -f $T/bootarg.awk diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6c734818a875..7710b1e1cdda 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -14,9 +14,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cd `dirname $scriptname`/../../../../../ diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 2dbfca3589b1..5a0b7ffcf047 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -15,9 +15,8 @@ F=$1 title=$2 -T=${TMPDIR-/tmp}/parse-build.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index d477618e7261..130d0de4c3bb 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture" startdate="`date`" starttime="`get_starttime`" -T=/tmp/torture.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log @@ -278,6 +277,8 @@ function torture_one { then cat $T/$curflavor.out | tee -a $T/log echo retcode=$retcode | tee -a $T/log + else + echo $resdir > $T/last-resdir fi if test "$retcode" == 0 then @@ -303,10 +304,12 @@ function torture_set { shift curflavor=$flavor torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan + mv $T/last-resdir $T/last-resdir-kasan || : fi if test "$do_kcsan" = "yes" then @@ -317,6 +320,7 @@ function torture_set { cur_kcsan_kmake_args="$kcsan_kmake_args" fi torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + mv $T/last-resdir $T/last-resdir-kcsan || : fi } @@ -326,20 +330,34 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" - echo " --- make clean" > "$amcdir/Make.out" 2>&1 + echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 - echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 - cp .config $amcdir - make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 - echo " --- make " >> "$amcdir/Make.out" 2>&1 - make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 - retcode="$?" - echo $retcode > "$amcdir/Make.exitcode" - if test "$retcode" == 0 + retcode=$? + buildphase='"make clean"' + if test "$retcode" -eq 0 + then + echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + cp .config $amcdir + make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 + retcode=$? + buildphase='"make allmodconfig"' + fi + if test "$retcode" -eq 0 + then + echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 + retcode="$?" + echo $retcode > "$amcdir/Make.exitcode" + buildphase='"make"' + fi + if test "$retcode" -eq 0 then echo "allmodconfig($retcode)" $amcdir >> $T/successes + echo Success >> $amcdir/log else echo "allmodconfig($retcode)" $amcdir >> $T/failures + echo " --- allmodconfig Test summary:" >> $amcdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log fi fi @@ -379,11 +397,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_rcuscale" = yes then @@ -391,11 +446,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_kvfree" = "yes" then @@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all + find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' | + grep -e '-k[ac]san$' > $T/xz-todo-copy + sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -490,6 +585,24 @@ then echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log fi wait + if test -s $T/xz-todo-copy + then + # The trick here is that we need corresponding + # vmlinux files from corresponding scenarios. + echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log + dirstash="`pwd`" + for i in `cat $T/xz-todo-copy` + do + cd $i + find . -name vmlinux -print > $T/xz-todo-copy-vmlinux + for v in `cat $T/xz-todo-copy-vmlinux` + do + rm -f "$v" + cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`" + done + cd "$dirstash" + done + fi echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log else diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 2b9d929a24ed..63ce02d1d5cc 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -31,7 +31,6 @@ FIXTURE(rtc) { FIXTURE_SETUP(rtc) { self->fd = open(rtc_file, O_RDONLY); - ASSERT_NE(-1, self->fd); } FIXTURE_TEARDOWN(rtc) { @@ -42,6 +41,10 @@ TEST_F(rtc, date_read) { int rc; struct rtc_time rtc_tm; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Read the RTC time/date */ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm); ASSERT_NE(-1, rc); @@ -85,6 +88,10 @@ TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) { struct rtc_time rtc_tm; time_t start_rtc_read, prev_rtc_read; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + TH_LOG("Continuously reading RTC time for %ds (with %dms breaks after every read).", READ_LOOP_DURATION_SEC, READ_LOOP_SLEEP_MS); @@ -119,6 +126,10 @@ TEST_F_TIMEOUT(rtc, uie_read, NUM_UIE + 2) { int i, rc, irq = 0; unsigned long data; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Turn on update interrupts */ rc = ioctl(self->fd, RTC_UIE_ON, 0); if (rc == -1) { @@ -144,6 +155,10 @@ TEST_F(rtc, uie_select) { int i, rc, irq = 0; unsigned long data; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Turn on update interrupts */ rc = ioctl(self->fd, RTC_UIE_ON, 0); if (rc == -1) { @@ -183,6 +198,10 @@ TEST_F(rtc, alarm_alm_set) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -237,6 +256,10 @@ TEST_F(rtc, alarm_wkalm_set) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -285,6 +308,10 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -339,6 +366,10 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4ae6c8991307..9c2f448bb3a9 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -392,6 +392,8 @@ TEST(mode_filter_without_nnp) .filter = filter, }; long ret; + cap_t cap = cap_get_proc(); + cap_flag_value_t is_cap_sys_admin = 0; ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); ASSERT_LE(0, ret) { @@ -400,8 +402,8 @@ TEST(mode_filter_without_nnp) errno = 0; ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); /* Succeeds with CAP_SYS_ADMIN, fails without */ - /* TODO(wad) check caps not euid */ - if (geteuid()) { + cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); + if (!is_cap_sys_admin) { EXPECT_EQ(-1, ret); EXPECT_EQ(EACCES, errno); } else { diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh index 22b6c8910b18..4710e09f49fa 100755 --- a/tools/testing/selftests/splice/short_splice_read.sh +++ b/tools/testing/selftests/splice/short_splice_read.sh @@ -127,7 +127,7 @@ expect_success "proc_handler: special read splice" test_splice /proc/sys/kernel/ if ! [ -d /sys/module/test_module/sections ] ; then expect_success "test_module kernel module load" modprobe test_module fi -expect_failure "kernfs attr splice" test_splice /sys/module/test_module/coresize -expect_failure "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text +expect_success "kernfs attr splice" test_splice /sys/module/test_module/coresize +expect_success "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text exit $ret diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee22e3447ec7..7bd94f8e490a 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -246,6 +246,110 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): stage, output, '"{}" did not complete successfully'.format(prefix)) +def verify_by_json(procout, res, tidx, args, pm): + try: + outputJSON = json.loads(procout) + except json.JSONDecodeError: + res.set_result(ResultState.fail) + res.set_failmsg('Cannot decode verify command\'s output. Is it JSON?') + return res + + matchJSON = json.loads(json.dumps(tidx['matchJSON'])) + + if type(outputJSON) != type(matchJSON): + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {} ' + failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + if len(matchJSON) > len(outputJSON): + failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}" + failmsg = failmsg.format(len(outputJSON), outputJSON, len(matchJSON), matchJSON) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + res = find_in_json(res, outputJSON, matchJSON, 0) + + return res + +def find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if res.get_result() == ResultState.fail: + return res + + if type(matchJSONVal) == list: + res = find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey) + + elif type(matchJSONVal) == dict: + res = find_in_json_dict(res, outputJSONVal, matchJSONVal) + else: + res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey) + + if res.get_result() != ResultState.fail: + res.set_result(ResultState.success) + return res + + return res + +def find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if (type(matchJSONVal) != type(outputJSONVal)): + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}' + failmsg = failmsg.format(outputJSONVal, matchJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + if len(matchJSONVal) > len(outputJSONVal): + failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}" + failmsg = failmsg.format(len(outputJSONVal), outputJSONVal, len(matchJSONVal), matchJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + for matchJSONIdx, matchJSONVal in enumerate(matchJSONVal): + res = find_in_json(res, outputJSONVal[matchJSONIdx], matchJSONVal, + matchJSONKey) + return res + +def find_in_json_dict(res, outputJSONVal, matchJSONVal): + for matchJSONKey, matchJSONVal in matchJSONVal.items(): + if type(outputJSONVal) == dict: + if matchJSONKey not in outputJSONVal: + failmsg = 'Key not found in json output: {}: {}\nMatching against output: {}' + failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + else: + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}' + failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return rest + + if type(outputJSONVal) == dict and (type(outputJSONVal[matchJSONKey]) == dict or + type(outputJSONVal[matchJSONKey]) == list): + if len(matchJSONVal) > 0: + res = find_in_json(res, outputJSONVal[matchJSONKey], matchJSONVal, matchJSONKey) + # handling corner case where matchJSONVal == [] or matchJSONVal == {} + else: + res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey) + else: + res = find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey) + return res + +def find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if matchJSONKey in outputJSONVal: + if matchJSONVal != outputJSONVal[matchJSONKey]: + failmsg = 'Value doesn\'t match: {}: {} != {}\nMatching against output: {}' + failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal[matchJSONKey], outputJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + return res + def run_one_test(pm, args, index, tidx): global NAMES result = True @@ -292,16 +396,22 @@ def run_one_test(pm, args, index, tidx): else: if args.verbose > 0: print('-----> verify stage') - match_pattern = re.compile( - str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"]) if procout: - match_index = re.findall(match_pattern, procout) - if len(match_index) != int(tidx["matchCount"]): - res.set_result(ResultState.fail) - res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + if 'matchJSON' in tidx: + verify_by_json(procout, res, tidx, args, pm) + elif 'matchPattern' in tidx: + match_pattern = re.compile( + str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) else: - res.set_result(ResultState.success) + res.set_result(ResultState.fail) + res.set_failmsg('Must specify a match option: matchJSON or matchPattern\n{}'.format(procout)) elif int(tidx["matchCount"]) != 0: res.set_result(ResultState.fail) res.set_failmsg('No output generated by verify command.') @@ -365,6 +475,7 @@ def test_runner(pm, args, filtered_tests): res.set_result(ResultState.skip) res.set_errormsg(errmsg) tsr.add_resultdata(res) + index += 1 continue try: badtest = tidx # in case it goes bad diff --git a/tools/testing/selftests/tdx/Makefile b/tools/testing/selftests/tdx/Makefile new file mode 100644 index 000000000000..8dd43517cd55 --- /dev/null +++ b/tools/testing/selftests/tdx/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -O3 -Wl,-no-as-needed -Wall -static + +TEST_GEN_PROGS := tdx_guest_test + +include ../lib.mk diff --git a/tools/testing/selftests/tdx/config b/tools/testing/selftests/tdx/config new file mode 100644 index 000000000000..aa1edc829ab6 --- /dev/null +++ b/tools/testing/selftests/tdx/config @@ -0,0 +1 @@ +CONFIG_TDX_GUEST_DRIVER=y diff --git a/tools/testing/selftests/tdx/tdx_guest_test.c b/tools/testing/selftests/tdx/tdx_guest_test.c new file mode 100644 index 000000000000..2a2afd856798 --- /dev/null +++ b/tools/testing/selftests/tdx/tdx_guest_test.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test TDX guest features + * + * Copyright (C) 2022 Intel Corporation. + * + * Author: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> + */ + +#include <sys/ioctl.h> + +#include <errno.h> +#include <fcntl.h> + +#include "../kselftest_harness.h" +#include "../../../../include/uapi/linux/tdx-guest.h" + +#define TDX_GUEST_DEVNAME "/dev/tdx_guest" +#define HEX_DUMP_SIZE 8 +#define DEBUG 0 + +/** + * struct tdreport_type - Type header of TDREPORT_STRUCT. + * @type: Type of the TDREPORT (0 - SGX, 81 - TDX, rest are reserved) + * @sub_type: Subtype of the TDREPORT (Default value is 0). + * @version: TDREPORT version (Default value is 0). + * @reserved: Added for future extension. + * + * More details can be found in TDX v1.0 module specification, sec + * titled "REPORTTYPE". + */ +struct tdreport_type { + __u8 type; + __u8 sub_type; + __u8 version; + __u8 reserved; +}; + +/** + * struct reportmac - TDX guest report data, MAC and TEE hashes. + * @type: TDREPORT type header. + * @reserved1: Reserved for future extension. + * @cpu_svn: CPU security version. + * @tee_tcb_info_hash: SHA384 hash of TEE TCB INFO. + * @tee_td_info_hash: SHA384 hash of TDINFO_STRUCT. + * @reportdata: User defined unique data passed in TDG.MR.REPORT request. + * @reserved2: Reserved for future extension. + * @mac: CPU MAC ID. + * + * It is MAC-protected and contains hashes of the remainder of the + * report structure along with user provided report data. More details can + * be found in TDX v1.0 Module specification, sec titled "REPORTMACSTRUCT" + */ +struct reportmac { + struct tdreport_type type; + __u8 reserved1[12]; + __u8 cpu_svn[16]; + __u8 tee_tcb_info_hash[48]; + __u8 tee_td_info_hash[48]; + __u8 reportdata[64]; + __u8 reserved2[32]; + __u8 mac[32]; +}; + +/** + * struct td_info - TDX guest measurements and configuration. + * @attr: TDX Guest attributes (like debug, spet_disable, etc). + * @xfam: Extended features allowed mask. + * @mrtd: Build time measurement register. + * @mrconfigid: Software-defined ID for non-owner-defined configuration + * of the guest - e.g., run-time or OS configuration. + * @mrowner: Software-defined ID for the guest owner. + * @mrownerconfig: Software-defined ID for owner-defined configuration of + * the guest - e.g., specific to the workload. + * @rtmr: Run time measurement registers. + * @reserved: Added for future extension. + * + * It contains the measurements and initial configuration of the TDX guest + * that was locked at initialization and a set of measurement registers + * that are run-time extendable. More details can be found in TDX v1.0 + * Module specification, sec titled "TDINFO_STRUCT". + */ +struct td_info { + __u8 attr[8]; + __u64 xfam; + __u64 mrtd[6]; + __u64 mrconfigid[6]; + __u64 mrowner[6]; + __u64 mrownerconfig[6]; + __u64 rtmr[24]; + __u64 reserved[14]; +}; + +/* + * struct tdreport - Output of TDCALL[TDG.MR.REPORT]. + * @reportmac: Mac protected header of size 256 bytes. + * @tee_tcb_info: Additional attestable elements in the TCB are not + * reflected in the reportmac. + * @reserved: Added for future extension. + * @tdinfo: Measurements and configuration data of size 512 bytes. + * + * More details can be found in TDX v1.0 Module specification, sec + * titled "TDREPORT_STRUCT". + */ +struct tdreport { + struct reportmac reportmac; + __u8 tee_tcb_info[239]; + __u8 reserved[17]; + struct td_info tdinfo; +}; + +static void print_array_hex(const char *title, const char *prefix_str, + const void *buf, int len) +{ + int i, j, line_len, rowsize = HEX_DUMP_SIZE; + const __u8 *ptr = buf; + + printf("\t\t%s", title); + + for (j = 0; j < len; j += rowsize) { + line_len = rowsize < (len - j) ? rowsize : (len - j); + printf("%s%.8x:", prefix_str, j); + for (i = 0; i < line_len; i++) + printf(" %.2x", ptr[j + i]); + printf("\n"); + } + + printf("\n"); +} + +TEST(verify_report) +{ + struct tdx_report_req req; + struct tdreport *tdreport; + int devfd, i; + + devfd = open(TDX_GUEST_DEVNAME, O_RDWR | O_SYNC); + ASSERT_LT(0, devfd); + + /* Generate sample report data */ + for (i = 0; i < TDX_REPORTDATA_LEN; i++) + req.reportdata[i] = i; + + /* Get TDREPORT */ + ASSERT_EQ(0, ioctl(devfd, TDX_CMD_GET_REPORT0, &req)); + + if (DEBUG) { + print_array_hex("\n\t\tTDX report data\n", "", + req.reportdata, sizeof(req.reportdata)); + + print_array_hex("\n\t\tTDX tdreport data\n", "", + req.tdreport, sizeof(req.tdreport)); + } + + /* Make sure TDREPORT data includes the REPORTDATA passed */ + tdreport = (struct tdreport *)req.tdreport; + ASSERT_EQ(0, memcmp(&tdreport->reportmac.reportdata[0], + req.reportdata, sizeof(req.reportdata))); + + ASSERT_EQ(0, close(devfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index fe1eb8271b35..cae8dca0fbff 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -8,3 +8,4 @@ procfs timens timer timerfd +vfork_exec diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index 3a5936cc10ab..f0d51d4d2c87 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c new file mode 100644 index 000000000000..beb7614941fb --- /dev/null +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +struct thread_args { + char *tst_name; + struct timespec *now; +}; + +static void *tcheck(void *_args) +{ + struct thread_args *args = _args; + struct timespec *now = args->now, tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) { + pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", + args->tst_name, tst.tv_sec, now->tv_sec); + return (void *)1UL; + } + } + return NULL; +} + +static int check_in_thread(char *tst_name, struct timespec *now) +{ + struct thread_args args = { + .tst_name = tst_name, + .now = now, + }; + pthread_t th; + void *retval; + + if (pthread_create(&th, NULL, tcheck, &args)) + return pr_perror("thread"); + if (pthread_join(th, &retval)) + return pr_perror("pthread_join"); + return !(retval == NULL); +} + +static int check(char *tst_name, struct timespec *now) +{ + struct timespec tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) + return pr_fail("%s: unexpected value: %ld (%ld)\n", + tst_name, tst.tv_sec, now->tv_sec); + } + if (check_in_thread(tst_name, now)) + return 1; + ksft_test_result_pass("%s\n", tst_name); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct timespec now; + int status; + pid_t pid; + + if (argc > 1) { + char *endptr; + + ksft_cnt.ksft_pass = 1; + now.tv_sec = strtoul(argv[1], &endptr, 0); + if (*endptr != 0) + return pr_perror("strtoul"); + + return check("child after exec", &now); + } + + nscheck(); + + ksft_set_plan(4); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + if (check("parent before vfork", &now)) + return 1; + + pid = vfork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + pr_perror("execve"); + _exit(1); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + ksft_inc_pass_cnt(); + ksft_test_result_pass("wait for child\n"); + + /* Check that we are still in the source timens. */ + if (check("parent after vfork", &now)) + return 1; + + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 1a5db1eb8ed5..a9bf9459fb25 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) include ../lib.mk -TEST_PROGS := test_smoke.sh test_space.sh +TEST_PROGS := test_smoke.sh test_space.sh test_async.sh TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py diff --git a/tools/testing/selftests/tpm2/test_async.sh b/tools/testing/selftests/tpm2/test_async.sh new file mode 100755 index 000000000000..43bf5bd772fd --- /dev/null +++ b/tools/testing/selftests/tpm2/test_async.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +[ -e /dev/tpm0 ] || exit $ksft_skip +[ -e /dev/tpmrm0 ] || exit $ksft_skip + +python3 -m unittest -v tpm2_tests.AsyncTest diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 3e5ff29ee1dd..58af963e5b55 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -7,4 +7,3 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest -python3 -m unittest -v tpm2_tests.AsyncTest diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c index fc25ede131b8..1df5d057d79f 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c +++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c @@ -14,7 +14,11 @@ #include "../kselftest.h" #include "parse_vdso.h" +#if defined(__riscv) +const char *version = "LINUX_4.15"; +#else const char *version = "LINUX_2.6"; +#endif const char *name = "__vdso_getcpu"; struct getcpu_cache; diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index 8ccc73ed8240..e411f287a426 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -27,6 +27,9 @@ #if defined(__aarch64__) const char *version = "LINUX_2.6.39"; const char *name = "__kernel_gettimeofday"; +#elif defined(__riscv) +const char *version = "LINUX_4.15"; +const char *name = "__vdso_gettimeofday"; #else const char *version = "LINUX_2.6"; const char *name = "__vdso_gettimeofday"; diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 7b9dc2426f18..1f8c36a9fa10 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +cow hugepage-mmap hugepage-mremap hugepage-shm @@ -33,3 +34,5 @@ memfd_secret soft-dirty split_huge_page_test ksm_tests +local_config.h +local_config.mk diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 163c2fde3cb3..89c14e41bd43 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests -LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + +include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') @@ -25,7 +27,8 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_FILES = compaction_test +TEST_GEN_FILES = cow +TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise @@ -52,6 +55,7 @@ TEST_GEN_FILES += userfaultfd TEST_GEN_PROGS += soft-dirty TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests +TEST_GEN_PROGS += ksm_functional_tests ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -95,7 +99,9 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk +$(OUTPUT)/cow: vm_util.c $(OUTPUT)/khugepaged: vm_util.c +$(OUTPUT)/ksm_functional_tests: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c @@ -150,8 +156,25 @@ warn_32bit_failure: endif endif +# cow_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. +$(OUTPUT)/cow: LDLIBS += $(COW_EXTRA_LIBS) + $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma + +local_config.mk local_config.h: check_config.sh + /bin/sh ./check_config.sh $(CC) + +EXTRA_CLEAN += local_config.mk local_config.h + +ifeq ($(COW_EXTRA_LIBS),) +all: warn_missing_liburing + +warn_missing_liburing: + @echo ; \ + echo "Warning: missing liburing support. Some COW tests will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh new file mode 100644 index 000000000000..bcba3af0acea --- /dev/null +++ b/tools/testing/selftests/vm/check_config.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Probe for libraries and create header files to record the results. Both C +# header files and Makefile include fragments are created. + +OUTPUT_H_FILE=local_config.h +OUTPUT_MKFILE=local_config.mk + +tmpname=$(mktemp) +tmpfile_c=${tmpname}.c +tmpfile_o=${tmpname}.o + +# liburing +echo "#include <sys/types.h>" > $tmpfile_c +echo "#include <liburing.h>" >> $tmpfile_c +echo "int func(void) { return 0; }" >> $tmpfile_c + +CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"} +$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 + +if [ -f $tmpfile_o ]; then + echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE + echo "COW_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE +else + echo "// No liburing support found" > $OUTPUT_H_FILE + echo "# No liburing support found, so:" > $OUTPUT_MKFILE + echo "COW_EXTRA_LIBS = " >> $OUTPUT_MKFILE +fi + +rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c new file mode 100644 index 000000000000..26f6ea3079e2 --- /dev/null +++ b/tools/testing/selftests/vm/cow.c @@ -0,0 +1,1536 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * COW (Copy On Write) tests. + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <dirent.h> +#include <assert.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <linux/memfd.h> + +#include "local_config.h" +#ifdef LOCAL_CONFIG_HAVE_LIBURING +#include <liburing.h> +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +#include "../../../../mm/gup_test.h" +#include "../kselftest.h" +#include "vm_util.h" + +static size_t pagesize; +static int pagemap_fd; +static size_t thpsize; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; +static int gup_fd; +static bool has_huge_zeropage; + +static void detect_thpsize(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + size_t size = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + size = strtoul(buf, NULL, 10); + if (size < pagesize) + size = 0; + if (size > 0) { + thpsize = size; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", + thpsize / 1024); + } + } + + close(fd); +} + +static void detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + size_t enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + enabled = strtoul(buf, NULL, 10); + if (enabled == 1) { + has_huge_zeropage = true; + ksft_print_msg("[INFO] huge zeropage is enabled\n"); + } + } + + close(fd); +} + +static void detect_hugetlbsizes(void) +{ + DIR *dir = opendir("/sys/kernel/mm/hugepages/"); + + if (!dir) + return; + + while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { + struct dirent *entry = readdir(dir); + size_t kb; + + if (!entry) + break; + if (entry->d_type != DT_DIR) + continue; + if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) + continue; + hugetlbsizes[nr_hugetlbsizes] = kb * 1024; + nr_hugetlbsizes++; + ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", + kb); + } + closedir(dir); +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} + +struct comm_pipes { + int child_ready[2]; + int parent_ready[2]; +}; + +static int setup_comm_pipes(struct comm_pipes *comm_pipes) +{ + if (pipe(comm_pipes->child_ready) < 0) + return -errno; + if (pipe(comm_pipes->parent_ready) < 0) { + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + return -errno; + } + + return 0; +} + +static void close_comm_pipes(struct comm_pipes *comm_pipes) +{ + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + close(comm_pipes->parent_ready[0]); + close(comm_pipes->parent_ready[1]); +} + +static int child_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + char *old = malloc(size); + char buf; + + /* Backup the original content. */ + memcpy(old, mem, size); + + /* Wait until the parent modified the page. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values. */ + return memcmp(old, mem, size); +} + +static int child_vmsplice_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + char *old, *new; + int fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + /* Backup the original content. */ + memcpy(old, mem, size); + + if (pipe(fds) < 0) + return -errno; + + /* Trigger a read-only pin. */ + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred < 0) + return -errno; + if (transferred == 0) + return -EINVAL; + + /* Unmap it from our page tables. */ + if (munmap(mem, size) < 0) + return -errno; + + /* Wait until the parent modified it. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values via the pipe. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) + return -errno; + } + + return memcmp(old, new, transferred); +} + +typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); + +static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, + child_fn fn) +{ + struct comm_pipes comm_pipes; + char buf; + int ret; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + exit(fn(mem, size, &comm_pipes)); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + + if (do_mprotect) { + /* + * mprotect() optimizations might try avoiding + * write-faults by directly mapping pages writable. + */ + ret = mprotect(mem, size, PROT_READ); + ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + } + + /* Modify the page. */ + memset(mem, 0xff, size); + write(comm_pipes.parent_ready[1], "0", 1); + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + + ksft_test_result(!ret, "No leak from parent into child\n"); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_cow_in_parent(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, false, child_memcmp_fn); +} + +static void test_cow_in_parent_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_memcmp_fn); +} + +static void test_vmsplice_in_child(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); +} + +static void test_vmsplice_in_child_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); +} + +static void do_test_vmsplice_in_parent(char *mem, size_t size, + bool before_fork) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + struct comm_pipes comm_pipes; + char *old, *new; + int ret, fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + memcpy(old, mem, size); + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free; + } + + if (pipe(fds) < 0) { + ksft_test_result_fail("pipe() failed\n"); + goto close_comm_pipes; + } + + if (before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + goto close_pipe; + } + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_pipe; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + /* Modify page content in the child. */ + memset(mem, 0xff, size); + exit(0); + } + + if (!before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + wait(&ret); + goto close_pipe; + } + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + if (munmap(mem, size) < 0) { + ksft_test_result_fail("munmap() failed\n"); + goto close_pipe; + } + write(comm_pipes.parent_ready[1], "0", 1); + + /* Wait until the child is done writing. */ + wait(&ret); + if (!WIFEXITED(ret)) { + ksft_test_result_fail("wait() failed\n"); + goto close_pipe; + } + + /* See if we still read the old values. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) { + ksft_test_result_fail("read() failed\n"); + goto close_pipe; + } + } + + ksft_test_result(!memcmp(old, new, transferred), + "No leak from child into parent\n"); +close_pipe: + close(fds[0]); + close(fds[1]); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free: + free(old); + free(new); +} + +static void test_vmsplice_before_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, true); +} + +static void test_vmsplice_after_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, false); +} + +#ifdef LOCAL_CONFIG_HAVE_LIBURING +static void do_test_iouring(char *mem, size_t size, bool use_fork) +{ + struct comm_pipes comm_pipes; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + ssize_t cur, total; + struct iovec iov; + char *buf, *tmp; + int ret, fd; + FILE *file; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + goto close_comm_pipes; + } + fd = fileno(file); + assert(fd); + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + goto close_file; + } + + /* Skip on errors, as we might just lack kernel support. */ + ret = io_uring_queue_init(1, &ring, 0); + if (ret < 0) { + ksft_test_result_skip("io_uring_queue_init() failed\n"); + goto free_tmp; + } + + /* + * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN + * | FOLL_LONGTERM the range. + * + * Skip on errors, as we might just lack kernel support or might not + * have sufficient MEMLOCK permissions. + */ + iov.iov_base = mem; + iov.iov_len = size; + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) { + ksft_test_result_skip("io_uring_register_buffers() failed\n"); + goto queue_exit; + } + + if (use_fork) { + /* + * fork() and keep the child alive until we're done. Note that + * we expect the pinned page to not get shared with the child. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unregister_buffers; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + } else { + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto unregister_buffers; + } + } + + /* + * Modify the page and write page content as observed by the fixed + * buffer pin to the file so we can verify it. + */ + memset(mem, 0xff, size); + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + ksft_test_result_fail("io_uring_get_sqe() failed\n"); + goto quit_child; + } + io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); + + ret = io_uring_submit(&ring); + if (ret < 0) { + ksft_test_result_fail("io_uring_submit() failed\n"); + goto quit_child; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + goto quit_child; + } + + if (cqe->res != size) { + ksft_test_result_fail("write_fixed failed\n"); + goto quit_child; + } + io_uring_cqe_seen(&ring, cqe); + + /* Read back the file content to the temporary buffer. */ + total = 0; + while (total < size) { + cur = pread(fd, tmp + total, size - total, total); + if (cur < 0) { + ksft_test_result_fail("pread() failed\n"); + goto quit_child; + } + total += cur; + } + + /* Finally, check if we read what we expected. */ + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/W pin is reliable\n"); + +quit_child: + if (use_fork) { + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + } +unregister_buffers: + io_uring_unregister_buffers(&ring); +queue_exit: + io_uring_queue_exit(&ring); +free_tmp: + free(tmp); +close_file: + fclose(file); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_iouring_ro(char *mem, size_t size) +{ + do_test_iouring(mem, size, false); +} + +static void test_iouring_fork(char *mem, size_t size) +{ + do_test_iouring(mem, size, true); +} + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +enum ro_pin_test { + RO_PIN_TEST, + RO_PIN_TEST_SHARED, + RO_PIN_TEST_PREVIOUSLY_SHARED, + RO_PIN_TEST_RO_EXCLUSIVE, +}; + +static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, + bool fast) +{ + struct pin_longterm_test args; + struct comm_pipes comm_pipes; + char *tmp, buf; + __u64 tmp_val; + int ret; + + if (gup_fd < 0) { + ksft_test_result_skip("gup_test not available\n"); + return; + } + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + return; + } + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free_tmp; + } + + switch (test) { + case RO_PIN_TEST: + break; + case RO_PIN_TEST_SHARED: + case RO_PIN_TEST_PREVIOUSLY_SHARED: + /* + * Share the pages with our child. As the pages are not pinned, + * this should just work. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + /* Wait until our child is ready. */ + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + + if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { + /* + * Tell the child to quit now and wait until it quit. + * The pages should now be mapped R/O into our page + * tables, but they are no longer shared. + */ + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + } + break; + case RO_PIN_TEST_RO_EXCLUSIVE: + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto close_comm_pipes; + } + break; + default: + assert(false); + } + + /* Take a R/O pin. This should trigger unsharing. */ + args.addr = (__u64)(uintptr_t)mem; + args.size = size; + args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); + if (ret) { + if (errno == EINVAL) + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + else + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + goto wait; + } + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* + * Read back the content via the pin to the temporary buffer and + * test if we observed the modification. + */ + tmp_val = (__u64)(uintptr_t)tmp; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); + if (ret) + ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); + else + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/O pin is reliable\n"); + + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); + if (ret) + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); +wait: + switch (test) { + case RO_PIN_TEST_SHARED: + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + break; + default: + break; + } +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free_tmp: + free(tmp); +} + +static void test_ro_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); +} + +static void test_ro_fast_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); +} + +static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); +} + +static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); +} + +static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); +} + +static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); +} + +typedef void (*test_fn)(char *mem, size_t size); + +static void do_run_with_base_page(test_fn fn, bool swapout) +{ + char *mem; + int ret; + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); + /* Ignore if not around on a kernel. */ + if (ret && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto munmap; + } + + /* Populate a base page. */ + memset(mem, 0, pagesize); + + if (swapout) { + madvise(mem, pagesize, MADV_PAGEOUT); + if (!pagemap_is_swapped(pagemap_fd, mem)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + } + + fn(mem, pagesize); +munmap: + munmap(mem, pagesize); +} + +static void run_with_base_page(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with base page\n", desc); + do_run_with_base_page(fn, false); +} + +static void run_with_base_page_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + do_run_with_base_page(fn, true); +} + +enum thp_run { + THP_RUN_PMD, + THP_RUN_PMD_SWAPOUT, + THP_RUN_PTE, + THP_RUN_PTE_SWAPOUT, + THP_RUN_SINGLE_PTE, + THP_RUN_SINGLE_PTE_SWAPOUT, + THP_RUN_PARTIAL_MREMAP, + THP_RUN_PARTIAL_SHARED, +}; + +static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +{ + char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; + size_t size, mmap_size, mremap_size; + int ret; + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Try to populate a THP. Touch the first sub-page and test if we get + * another sub-page populated automatically. + */ + mem[0] = 0; + if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + ksft_test_result_skip("Did not get a THP populated\n"); + goto munmap; + } + memset(mem, 0, thpsize); + + size = thpsize; + switch (thp_run) { + case THP_RUN_PMD: + case THP_RUN_PMD_SWAPOUT: + break; + case THP_RUN_PTE: + case THP_RUN_PTE_SWAPOUT: + /* + * Trigger PTE-mapping the THP by temporarily mapping a single + * subpage R/O. + */ + ret = mprotect(mem + pagesize, pagesize, PROT_READ); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + break; + case THP_RUN_SINGLE_PTE: + case THP_RUN_SINGLE_PTE_SWAPOUT: + /* + * Discard all but a single subpage of that PTE-mapped THP. What + * remains is a single PTE mapping a single subpage. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); + if (ret) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto munmap; + } + size = pagesize; + break; + case THP_RUN_PARTIAL_MREMAP: + /* + * Remap half of the THP. We need some new memory location + * for that. + */ + mremap_size = thpsize / 2; + mremap_mem = mmap(NULL, mremap_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + tmp = mremap(mem + mremap_size, mremap_size, mremap_size, + MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); + if (tmp != mremap_mem) { + ksft_test_result_fail("mremap() failed\n"); + goto munmap; + } + size = mremap_size; + break; + case THP_RUN_PARTIAL_SHARED: + /* + * Share the first page of the THP with a child and quit the + * child. This will result in some parts of the THP never + * have been shared. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); + if (ret) { + ksft_test_result_fail("MADV_DONTFORK failed\n"); + goto munmap; + } + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto munmap; + } else if (!ret) { + exit(0); + } + wait(&ret); + /* Allow for sharing all pages again. */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); + if (ret) { + ksft_test_result_fail("MADV_DOFORK failed\n"); + goto munmap; + } + break; + default: + assert(false); + } + + switch (thp_run) { + case THP_RUN_PMD_SWAPOUT: + case THP_RUN_PTE_SWAPOUT: + case THP_RUN_SINGLE_PTE_SWAPOUT: + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + break; + default: + break; + } + + fn(mem, size); +munmap: + munmap(mmap_mem, mmap_size); + if (mremap_mem != MAP_FAILED) + munmap(mremap_mem, mremap_size); +} + +static void run_with_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD); +} + +static void run_with_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); +} + +static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE); +} + +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); +} + +static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE); +} + +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); +} + +static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); +} + +static void run_with_partial_shared_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); +} + +static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + char *mem, *dummy; + + ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; + + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + return; + } + + /* Populate an huge page. */ + memset(mem, 0, hugetlbsize); + + /* + * We need a total of two hugetlb pages to handle COW/unsharing + * properly, otherwise we might get zapped by a SIGBUS. + */ + dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (dummy == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto munmap; + } + munmap(dummy, hugetlbsize); + + fn(mem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); +} + +struct test_case { + const char *desc; + test_fn fn; +}; + +/* + * Test cases that are specific to anonymous pages: pages in private mappings + * that may get shared via COW during fork(). + */ +static const struct test_case anon_test_cases[] = { + /* + * Basic COW tests for fork() without any GUP. If we miss to break COW, + * either the child can observe modifications by the parent or the + * other way around. + */ + { + "Basic COW after fork()", + test_cow_in_parent, + }, + /* + * Basic test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "Basic COW after fork() with mprotect() optimization", + test_cow_in_parent_mprotect, + }, + /* + * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If + * we miss to break COW, the child observes modifications by the parent. + * This is CVE-2020-29374 reported by Jann Horn. + */ + { + "vmsplice() + unmap in child", + test_vmsplice_in_child + }, + /* + * vmsplice() test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "vmsplice() + unmap in child with mprotect() optimization", + test_vmsplice_in_child_mprotect + }, + /* + * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after + * fork(); modify in the child. If we miss to break COW, the parent + * observes modifications by the child. + */ + { + "vmsplice() before fork(), unmap in parent after fork()", + test_vmsplice_before_fork, + }, + /* + * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the + * child. If we miss to break COW, the parent observes modifications by + * the child. + */ + { + "vmsplice() + unmap in parent after fork()", + test_vmsplice_after_fork, + }, +#ifdef LOCAL_CONFIG_HAVE_LIBURING + /* + * Take a R/W longterm pin and then map the page R/O into the page + * table to trigger a write fault on next access. When modifying the + * page, the page content must be visible via the pin. + */ + { + "R/O-mapping a page registered as iouring fixed buffer", + test_iouring_ro, + }, + /* + * Take a R/W longterm pin and then fork() a child. When modifying the + * page, the page content must be visible via the pin. We expect the + * pinned page to not get shared with the child. + */ + { + "fork() with an iouring fixed buffer", + test_iouring_fork, + }, + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + /* + * Take a R/O longterm pin on a R/O-mapped shared anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped shared page", + test_ro_pin_on_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped shared page", + test_ro_fast_pin_on_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that + * was previously shared. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped previously-shared page", + test_ro_pin_on_ro_previously_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped previously-shared page", + test_ro_fast_pin_on_ro_previously_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped exclusive page", + test_ro_pin_on_ro_exclusive, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped exclusive page", + test_ro_fast_pin_on_ro_exclusive, + }, +}; + +static void run_anon_test_case(struct test_case const *test_case) +{ + int i; + + run_with_base_page(test_case->fn, test_case->desc); + run_with_base_page_swap(test_case->fn, test_case->desc); + if (thpsize) { + run_with_thp(test_case->fn, test_case->desc); + run_with_thp_swap(test_case->fn, test_case->desc); + run_with_pte_mapped_thp(test_case->fn, test_case->desc); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); + run_with_single_pte_of_thp(test_case->fn, test_case->desc); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); + run_with_partial_mremap_thp(test_case->fn, test_case->desc); + run_with_partial_shared_thp(test_case->fn, test_case->desc); + } + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static void run_anon_test_cases(void) +{ + int i; + + ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) + run_anon_test_case(&anon_test_cases[i]); +} + +static int tests_per_anon_test_case(void) +{ + int tests = 2 + nr_hugetlbsizes; + + if (thpsize) + tests += 8; + return tests; +} + +typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); + +static void test_cow(char *mem, const char *smem, size_t size) +{ + char *old = malloc(size); + + /* Backup the original content. */ + memcpy(old, smem, size); + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* See if we still read the old values via the other mapping. */ + ksft_test_result(!memcmp(smem, old, size), + "Other mapping not modified\n"); + free(old); +} + +static void test_ro_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, false); +} + +static void test_ro_fast_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, true); +} + +static void run_with_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + + ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Read from the page to populate the shared zeropage. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +} + +static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, *mmap_mem, *mmap_smem, tmp; + size_t mmap_size; + int ret; + + ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + + if (!has_huge_zeropage) { + ksft_test_result_skip("Huge zeropage not enabled\n"); + return; + } + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + mmap_smem = mmap(NULL, mmap_size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_smem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Read from the memory to populate the huge shared zeropage. Read from + * the first sub-page and test if we get another sub-page populated + * automatically. + */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || + !pagemap_is_populated(pagemap_fd, smem + pagesize)) { + ksft_test_result_skip("Did not get THPs populated\n"); + goto munmap; + } + + fn(mem, smem, thpsize); +munmap: + munmap(mmap_mem, mmap_size); + if (mmap_smem != MAP_FAILED) + munmap(mmap_smem, mmap_size); +} + +static void run_with_memfd(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd\n", desc); + + fd = memfd_create("test", 0); + if (fd < 0) { + ksft_test_result_fail("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + close(fd); +} + +static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + FILE *file; + int fd; + + ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + return; + } + + fd = fileno(file); + if (fd < 0) { + ksft_test_result_skip("fileno() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + fclose(file); +} + +static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, + size_t hugetlbsize) +{ + int flags = MFD_HUGETLB; + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; + + fd = memfd_create("test", flags); + if (fd < 0) { + ksft_test_result_skip("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, hugetlbsize)) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, + 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); + if (mem != MAP_FAILED) + munmap(smem, hugetlbsize); +close: + close(fd); +} + +struct non_anon_test_case { + const char *desc; + non_anon_test_fn fn; +}; + +/* + * Test cases that target any pages in private mappings that are not anonymous: + * pages that may get shared via COW ndependent of fork(). This includes + * the shared zeropage(s), pagecache pages, ... + */ +static const struct non_anon_test_case non_anon_test_cases[] = { + /* + * Basic COW test without any GUP. If we miss to break COW, changes are + * visible via other private/shared mappings. + */ + { + "Basic COW", + test_cow, + }, + /* + * Take a R/O longterm pin. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O longterm GUP pin", + test_ro_pin, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O longterm GUP-fast pin", + test_ro_fast_pin, + }, +}; + +static void run_non_anon_test_case(struct non_anon_test_case const *test_case) +{ + int i; + + run_with_zeropage(test_case->fn, test_case->desc); + run_with_memfd(test_case->fn, test_case->desc); + run_with_tmpfile(test_case->fn, test_case->desc); + if (thpsize) + run_with_huge_zeropage(test_case->fn, test_case->desc); + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_memfd_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static void run_non_anon_test_cases(void) +{ + int i; + + ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) + run_non_anon_test_case(&non_anon_test_cases[i]); +} + +static int tests_per_non_anon_test_case(void) +{ + int tests = 3 + nr_hugetlbsizes; + + if (thpsize) + tests += 1; + return tests; +} + +int main(int argc, char **argv) +{ + int err; + + pagesize = getpagesize(); + detect_thpsize(); + detect_hugetlbsizes(); + detect_huge_zeropage(); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + run_anon_test_cases(); + run_non_anon_test_cases(); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c index 93f9e7b81331..955ef87f382c 100644 --- a/tools/testing/selftests/vm/hugepage-mmap.c +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -16,14 +16,13 @@ * range. * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ - +#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> -#define FILE_NAME "huge/hugepagefile" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -67,16 +66,16 @@ int main(void) void *addr; int fd, ret; - fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); - unlink(FILE_NAME); + close(fd); exit(1); } @@ -87,7 +86,6 @@ int main(void) munmap(addr, LENGTH); close(fd); - unlink(FILE_NAME); return ret; } diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index e63a0214f639..e53b5eaa8fce 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -22,6 +22,7 @@ #include <sys/syscall.h> /* Definition of SYS_* constants */ #include <linux/userfaultfd.h> #include <sys/ioctl.h> +#include <string.h> #define DEFAULT_LENGTH_MB 10UL #define MB_TO_BYTES(x) (x * 1024 * 1024) @@ -108,26 +109,23 @@ static void register_region_with_uffd(char *addr, size_t len) int main(int argc, char *argv[]) { size_t length = 0; + int ret = 0, fd; - if (argc != 2 && argc != 3) { - printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); + if (argc >= 2 && !strcmp(argv[1], "-h")) { + printf("Usage: %s [length_in_MB]\n", argv[0]); exit(1); } /* Read memory length as the first arg if valid, otherwise fallback to * the default length. */ - if (argc == 3) - length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL; + if (argc >= 2) + length = (size_t)atoi(argv[1]); + else + length = DEFAULT_LENGTH_MB; - length = length > 0 ? length : DEFAULT_LENGTH_MB; length = MB_TO_BYTES(length); - - int ret = 0; - - /* last arg is the hugetlb file name */ - int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755); - + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { perror("Open failed"); exit(1); @@ -185,7 +183,6 @@ int main(int argc, char *argv[]) } close(fd); - unlink(argv[argc-1]); return ret; } diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index 3c9943131881..a634f47d1e56 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -12,6 +12,7 @@ * directory. */ +#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <unistd.h> @@ -19,7 +20,6 @@ #define __USE_GNU #include <fcntl.h> -#define USAGE "USAGE: %s <hugepagefile_name>\n" #define MIN_FREE_PAGES 20 #define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */ @@ -103,11 +103,6 @@ int main(int argc, char **argv) int fd; int ret; - if (argc != 2) { - printf(USAGE, argv[0]); - exit(1); - } - huge_page_size = default_huge_page_size(); if (!huge_page_size) { printf("Unable to determine huge page size, exiting!\n"); @@ -125,9 +120,9 @@ int main(int argc, char **argv) exit(1); } - fd = open(argv[1], O_CREAT | O_RDWR, 0755); + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } @@ -200,7 +195,7 @@ int main(int argc, char **argv) exit(1); } - /* addr + length should be aligned up to huge page size */ + /* addr + length should be aligned down to huge page size */ if (madvise(addr, ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size, MADV_DONTNEED)) { @@ -208,10 +203,11 @@ int main(int argc, char **argv) exit(1); } - /* should free all pages in mapping */ - validate_free_pages(free_hugepages); + /* should free all but last page in mapping */ + validate_free_pages(free_hugepages - 1); (void)munmap(addr, NR_HUGE_PAGES * huge_page_size); + validate_free_pages(free_hugepages); /* * Test MADV_DONTNEED on anonymous private mapping @@ -406,6 +402,5 @@ int main(int argc, char **argv) (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size); close(fd); - unlink(argv[1]); return 0; } diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c new file mode 100644 index 000000000000..b11b7e5115dc --- /dev/null +++ b/tools/testing/selftests/vm/ksm_functional_tests.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KSM functional tests + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> + +#include "../kselftest.h" +#include "vm_util.h" + +#define KiB 1024u +#define MiB (1024 * KiB) + +static int ksm_fd; +static int ksm_full_scans_fd; +static int pagemap_fd; +static size_t pagesize; + +static bool range_maps_duplicates(char *addr, unsigned long size) +{ + unsigned long offs_a, offs_b, pfn_a, pfn_b; + + /* + * There is no easy way to check if there are KSM pages mapped into + * this range. We only check that the range does not map the same PFN + * twice by comaring each pair of mapped pages. + */ + for (offs_a = 0; offs_a < size; offs_a += pagesize) { + pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a); + /* Page not present or PFN not exposed by the kernel. */ + if (pfn_a == -1ul || !pfn_a) + continue; + + for (offs_b = offs_a + pagesize; offs_b < size; + offs_b += pagesize) { + pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b); + if (pfn_b == -1ul || !pfn_b) + continue; + if (pfn_a == pfn_b) + return true; + } + } + return false; +} + +static long ksm_get_full_scans(void) +{ + char buf[10]; + ssize_t ret; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +static int ksm_merge(void) +{ + long start_scans, end_scans; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) + return start_scans; + if (write(ksm_fd, "1", 1) != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size) +{ + char *map; + + map = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (map == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return MAP_FAILED; + } + + /* Don't use THP. Ignore if THP are not around on a kernel. */ + if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto unmap; + } + + /* Make sure each page contains the same values to merge them. */ + memset(map, val, size); + if (madvise(map, size, MADV_MERGEABLE)) { + ksft_test_result_fail("MADV_MERGEABLE failed\n"); + goto unmap; + } + + /* Run KSM to trigger merging and wait. */ + if (ksm_merge()) { + ksft_test_result_fail("Running KSM failed\n"); + goto unmap; + } + return map; +unmap: + munmap(map, size); + return MAP_FAILED; +} + +static void test_unmerge(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +static void test_unmerge_discarded(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* Discard half of all mapped pages so we have pte_none() entries. */ + if (madvise(map, size / 2, MADV_DONTNEED)) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto unmap; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +#ifdef __NR_userfaultfd +static void test_unmerge_uffd_wp(void) +{ + struct uffdio_writeprotect uffd_writeprotect; + struct uffdio_register uffdio_register; + const unsigned int size = 2 * MiB; + struct uffdio_api uffdio_api; + char *map; + int uffd; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* See if UFFD is around. */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + ksft_test_result_skip("__NR_userfaultfd failed\n"); + goto unmap; + } + + /* See if UFFD-WP is around. */ + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { + ksft_test_result_fail("UFFDIO_API failed\n"); + goto close_uffd; + } + if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { + ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n"); + goto close_uffd; + } + + /* Register UFFD-WP, no need for an actual handler. */ + uffdio_register.range.start = (unsigned long) map; + uffdio_register.range.len = size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { + ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n"); + goto close_uffd; + } + + /* Write-protect the range using UFFD-WP. */ + uffd_writeprotect.range.start = (unsigned long) map; + uffd_writeprotect.range.len = size; + uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { + ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n"); + goto close_uffd; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto close_uffd; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +close_uffd: + close(uffd); +unmap: + munmap(map, size); +} +#endif + +int main(int argc, char **argv) +{ + unsigned int tests = 2; + int err; + +#ifdef __NR_userfaultfd + tests++; +#endif + + ksft_print_header(); + ksft_set_plan(tests); + + pagesize = getpagesize(); + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + + test_unmerge(); + test_unmerge_discarded(); +#ifdef __NR_userfaultfd + test_unmerge_uffd_wp(); +#endif + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa..f9eb4d67e0dd 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -40,6 +40,7 @@ enum ksm_test_name { CHECK_KSM_NUMA_MERGE, KSM_MERGE_TIME, KSM_MERGE_TIME_HUGE_PAGES, + KSM_UNMERGE_TIME, KSM_COW_TIME }; @@ -108,7 +109,10 @@ static void print_help(void) " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" - " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n" + " -D evaluate unmerging time and speed when disabling KSM.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" " -C evaluate the time required to break COW of merged pages.\n\n"); @@ -188,6 +192,16 @@ static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, return 0; } +static int ksm_unmerge_pages(void *addr, size_t size, + struct timespec start_time, int timeout) +{ + if (madvise(addr, size, MADV_UNMERGEABLE)) { + perror("madvise"); + return 1; + } + return 0; +} + static bool assert_ksm_pages_count(long dupl_page_count) { unsigned long max_page_sharing, pages_sharing, pages_shared; @@ -560,6 +574,53 @@ err_out: return KSFT_FAIL; } +static int ksm_unmerge_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + + map_size *= MB; + + map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size); + if (!map_ptr) + return KSFT_FAIL; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr, map_size); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, map_size); + return KSFT_FAIL; +} + static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size) { void *map_ptr; @@ -644,7 +705,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCHD")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -701,6 +762,9 @@ int main(int argc, char *argv[]) case 'H': test_name = KSM_MERGE_TIME_HUGE_PAGES; break; + case 'D': + test_name = KSM_UNMERGE_TIME; + break; case 'C': test_name = KSM_COW_TIME; break; @@ -762,6 +826,14 @@ int main(int argc, char *argv[]) ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_UNMERGE_TIME: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_unmerge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, + ksm_scan_limit_sec, size_MB); + break; case KSM_COW_TIME: ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 715a42e8e2cd..262eae6b58f2 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -20,6 +20,13 @@ #include "../kselftest.h" #include "vm_util.h" +#ifndef MADV_POPULATE_READ +#define MADV_POPULATE_READ 22 +#endif /* MADV_POPULATE_READ */ +#ifndef MADV_POPULATE_WRITE +#define MADV_POPULATE_WRITE 23 +#endif /* MADV_POPULATE_WRITE */ + /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ @@ -27,14 +34,6 @@ static size_t pagesize; -static bool pagemap_is_populated(int fd, char *start) -{ - uint64_t entry = pagemap_get_entry(fd, start); - - /* Present or swapped. */ - return entry & 0xc000000000000000ull; -} - static void sense_support(void) { char *addr; diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index b078ce9c6d2a..72c14cd3ddc7 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void) return 1; } +static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 291bc1e07842..95f403a0c46d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include <errno.h> +#include <linux/elf.h> #include <linux/futex.h> #include <time.h> #include <sys/time.h> @@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } +#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif }; void run_tests_once(void) diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e780e76c26b8..8984e0bb58c7 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -1,14 +1,88 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -#please run as root +# Please run as root # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -mnt=./huge exitcode=0 -#get huge pagesize and freepages from /proc/meminfo +usage() { + cat <<EOF +usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"] + -t: specify specific categories to tests to run + -h: display this message + +The default behavior is to run all tests. + +Alternatively, specific groups tests can be run by passing a string +to the -t argument containing one or more of the following categories +separated by spaces: +- mmap + tests for mmap(2) +- gup_test + tests for gup using gup_test interface +- userfaultfd + tests for userfaultfd(2) +- compaction + a test for the patch "Allow compaction of unevictable pages" +- mlock + tests for mlock(2) +- mremap + tests for mremap(2) +- hugevm + tests for very large virtual address space +- vmalloc + vmalloc smoke tests +- hmm + hmm smoke tests +- madv_populate + test memadvise(2) MADV_POPULATE_{READ,WRITE} options +- memfd_secret + test memfd_secret(2) +- process_mrelease + test process_mrelease(2) +- ksm + ksm tests that do not require >=2 NUMA nodes +- ksm_numa + ksm tests that require >=2 NUMA nodes +- pkey + memory protection key tests +- soft_dirty + test soft dirty page bit semantics +- cow + test copy-on-write semantics +example: ./run_vmtests.sh -t "hmm mmap ksm" +EOF + exit 0 +} + + +while getopts "ht:" OPT; do + case ${OPT} in + "h") usage ;; + "t") VM_SELFTEST_ITEMS=${OPTARG} ;; + esac +done +shift $((OPTIND -1)) + +# default behavior: run all tests +VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default} + +test_selected() { + if [ "$VM_SELFTEST_ITEMS" == "default" ]; then + # If no VM_SELFTEST_ITEMS are specified, run all tests + return 0 + fi + # If test selected argument is one of the test items + if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then + return 0 + else + return 1 + fi +} + +# get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs="$size" @@ -28,7 +102,7 @@ hpgsize_MB=$((hpgsize_KB / 1024)) half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) needmem_KB=$((half_ufd_size_MB * 2 * 1024)) -#set proper nr_hugepages +# set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) @@ -57,144 +131,144 @@ else exit 1 fi -#filter 64bit architectures +# filter 64bit architectures ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi VADDR64=0 -echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1 +echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1 # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { - local title="running $*" - local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) - printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" - - "$@" - local ret=$? - if [ $ret -eq 0 ]; then - echo "[PASS]" - elif [ $ret -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip - else - echo "[FAIL]" - exitcode=1 - fi + if test_selected ${CATEGORY}; then + local title="running $*" + local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + + "$@" + local ret=$? + if [ $ret -eq 0 ]; then + echo "[PASS]" + elif [ $ret -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip + else + echo "[FAIL]" + exitcode=1 + fi + fi # test_selected } -mkdir "$mnt" -mount -t hugetlbfs none "$mnt" - -run_test ./hugepage-mmap +CATEGORY="hugetlb" run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) shmall=$(cat /proc/sys/kernel/shmall) echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall -run_test ./hugepage-shm +CATEGORY="hugetlb" run_test ./hugepage-shm echo "$shmmax" > /proc/sys/kernel/shmmax echo "$shmall" > /proc/sys/kernel/shmall -run_test ./map_hugetlb - -run_test ./hugepage-mremap "$mnt"/huge_mremap -rm -f "$mnt"/huge_mremap - -run_test ./hugepage-vmemmap +CATEGORY="hugetlb" run_test ./map_hugetlb +CATEGORY="hugetlb" run_test ./hugepage-mremap +CATEGORY="hugetlb" run_test ./hugepage-vmemmap +CATEGORY="hugetlb" run_test ./hugetlb-madvise -run_test ./hugetlb-madvise "$mnt"/madvise-test -rm -f "$mnt"/madvise-test - -echo "NOTE: The above hugetlb tests provide minimal coverage. Use" -echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" -echo " hugetlb regression testing." +if test_selected "hugetlb"; then + echo "NOTE: These hugetlb tests provide minimal coverage. Use" + echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" + echo " hugetlb regression testing." +fi -run_test ./map_fixed_noreplace +CATEGORY="mmap" run_test ./map_fixed_noreplace # get_user_pages_fast() benchmark -run_test ./gup_test -u +CATEGORY="gup_test" run_test ./gup_test -u # pin_user_pages_fast() benchmark -run_test ./gup_test -a +CATEGORY="gup_test" run_test ./gup_test -a # Dump pages 0, 19, and 4096, using pin_user_pages: -run_test ./gup_test -ct -F 0x1 0 19 0x1000 +CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 uffd_mods=("" ":dev") for mod in "${uffd_mods[@]}"; do - run_test ./userfaultfd anon${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd anon${mod} 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size ($half_ufd_size_MB), which is used for *each*. - run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 - run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test - rm -f "$mnt"/uffd-test - run_test ./userfaultfd shmem${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd shmem${mod} 20 16 done #cleanup -umount "$mnt" -rm -rf "$mnt" echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages -run_test ./compaction_test +CATEGORY="compaction" run_test ./compaction_test -run_test sudo -u nobody ./on-fault-limit +CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit -run_test ./map_populate +CATEGORY="mmap" run_test ./map_populate -run_test ./mlock-random-test +CATEGORY="mlock" run_test ./mlock-random-test -run_test ./mlock2-tests +CATEGORY="mlock" run_test ./mlock2-tests -run_test ./mrelease_test +CATEGORY="process_mrelease" run_test ./mrelease_test -run_test ./mremap_test +CATEGORY="mremap" run_test ./mremap_test -run_test ./thuge-gen +CATEGORY="hugetlb" run_test ./thuge-gen if [ $VADDR64 -ne 0 ]; then - run_test ./virtual_address_range + CATEGORY="hugevm" run_test ./virtual_address_range # virtual address 128TB switch test - run_test ./va_128TBswitch.sh + CATEGORY="hugevm" run_test ./va_128TBswitch.sh fi # VADDR64 # vmalloc stability smoke test -run_test ./test_vmalloc.sh smoke +CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke -run_test ./mremap_dontunmap +CATEGORY="mremap" run_test ./mremap_dontunmap -run_test ./test_hmm.sh smoke +CATEGORY="hmm" run_test ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests -run_test ./madv_populate +CATEGORY="madv_populate" run_test ./madv_populate -run_test ./memfd_secret +CATEGORY="memfd_secret" run_test ./memfd_secret # KSM MADV_MERGEABLE test with 10 identical pages -run_test ./ksm_tests -M -p 10 +CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test -run_test ./ksm_tests -U +CATEGORY="ksm" run_test ./ksm_tests -U # KSM test with 10 zero pages and use_zero_pages = 0 -run_test ./ksm_tests -Z -p 10 -z 0 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0 # KSM test with 10 zero pages and use_zero_pages = 1 -run_test ./ksm_tests -Z -p 10 -z 1 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 1 -run_test ./ksm_tests -N -m 1 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 0 -run_test ./ksm_tests -N -m 0 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 + +CATEGORY="ksm" run_test ./ksm_functional_tests + +run_test ./ksm_functional_tests # protection_keys tests if [ -x ./protection_keys_32 ] then - run_test ./protection_keys_32 + CATEGORY="pkey" run_test ./protection_keys_32 fi if [ -x ./protection_keys_64 ] then - run_test ./protection_keys_64 + CATEGORY="pkey" run_test ./protection_keys_64 fi -run_test ./soft-dirty +CATEGORY="soft_dirty" run_test ./soft-dirty + +# COW tests +CATEGORY="cow" run_test ./cow exit $exitcode diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 297f250c1d95..7f22844ed704 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -93,10 +93,8 @@ static volatile bool test_uffdio_zeropage_eexist = true; static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; - static bool map_shared; -static int shm_fd; -static int huge_fd; +static int mem_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; @@ -143,7 +141,7 @@ const char *examples = "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" "./userfaultfd hugetlb 256 50\n\n" "# Run the same hugetlb test but using shared file:\n" - "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" + "./userfaultfd hugetlb_shared 256 50\n\n" "# 10MiB-~6GiB 999 bounces anonymous test, " "continue forever unless an error triggers\n" "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; @@ -260,35 +258,21 @@ static void hugetlb_release_pages(char *rel_area) static void hugetlb_allocate_area(void **alloc_area, bool is_src) { + off_t size = nr_pages * page_size; + off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; - if (!map_shared) - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (is_src ? 0 : MAP_NORESERVE), - -1, - 0); - else - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED | - (is_src ? 0 : MAP_NORESERVE), - huge_fd, - is_src ? 0 : nr_pages * page_size); + *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (is_src ? 0 : MAP_NORESERVE), + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { - area_alias = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - huge_fd, - is_src ? 0 : nr_pages * page_size); + area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } @@ -334,14 +318,14 @@ static void shmem_allocate_area(void **alloc_area, bool is_src) } *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); if (test_collapse && *alloc_area != p) err("mmap of memfd failed at %p", p); area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); if (test_collapse && area_alias != p_alias) @@ -1841,21 +1825,17 @@ int main(int argc, char **argv) } nr_pages = nr_pages_per_cpu * nr_cpus; - if (test_type == TEST_HUGETLB && map_shared) { - if (argc < 5) - usage(); - huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) - err("Open of %s failed", argv[4]); - if (ftruncate(huge_fd, 0)) - err("ftruncate %s to size 0 failed", argv[4]); - } else if (test_type == TEST_SHMEM) { - shm_fd = memfd_create(argv[0], 0); - if (shm_fd < 0) + if (test_type == TEST_SHMEM || test_type == TEST_HUGETLB) { + unsigned int memfd_flags = 0; + + if (test_type == TEST_HUGETLB) + memfd_flags = MFD_HUGETLB; + mem_fd = memfd_create(argv[0], memfd_flags); + if (mem_fd < 0) err("memfd_create"); - if (ftruncate(shm_fd, nr_pages * page_size * 2)) + if (ftruncate(mem_fd, nr_pages * page_size * 2)) err("ftruncate"); - if (fallocate(shm_fd, + if (fallocate(mem_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, nr_pages * page_size * 2)) err("fallocate"); diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index f11f8adda521..40e795624ff3 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -28,6 +28,31 @@ bool pagemap_is_softdirty(int fd, char *start) return entry & 0x0080000000000000ull; } +bool pagemap_is_swapped(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + return entry & 0x4000000000000000ull; +} + +bool pagemap_is_populated(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* Present or swapped. */ + return entry & 0xc000000000000000ull; +} + +unsigned long pagemap_get_pfn(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* If present (63th bit), PFN is at bit 0 -- 54. */ + if (entry & 0x8000000000000000ull) + return entry & 0x007fffffffffffffull; + return -1ul; +} + void clear_softdirty(void) { int ret; diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 5c35de454e08..1995ee911ef2 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -4,6 +4,9 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); +bool pagemap_is_swapped(int fd, char *start); +bool pagemap_is_populated(int fd, char *start); +unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index f45e510500c0..bc71cbca0dde 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Watchdog Driver Test Program +* Watchdog Driver Test Program +* - Tests all ioctls +* - Tests Magic Close - CONFIG_WATCHDOG_NOWAYOUT +* - Could be tested against softdog driver on systems that +* don't have watchdog hardware. +* - TODO: +* - Enhance test to add coverage for WDIOC_GETTEMP. +* +* Reference: Documentation/watchdog/watchdog-api.rst */ #include <errno.h> @@ -19,13 +27,14 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NLf:i"; +static const char sopts[] = "bdehp:st:Tn:NLf:i"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, {"enable", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, {"pingrate", required_argument, NULL, 'p'}, + {"status", no_argument, NULL, 's'}, {"timeout", required_argument, NULL, 't'}, {"gettimeout", no_argument, NULL, 'T'}, {"pretimeout", required_argument, NULL, 'n'}, @@ -74,6 +83,7 @@ static void usage(char *progname) printf(" -f, --file\t\tOpen watchdog device file\n"); printf("\t\t\tDefault is /dev/watchdog\n"); printf(" -i, --info\t\tShow watchdog_info\n"); + printf(" -s, --status\t\tGet status & supported features\n"); printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); printf(" -d, --disable\t\tTurn off the watchdog timer\n"); printf(" -e, --enable\t\tTurn on the watchdog timer\n"); @@ -91,6 +101,73 @@ static void usage(char *progname) printf("Example: %s -t 12 -T -n 7 -N\n", progname); } +struct wdiof_status { + int flag; + const char *status_str; +}; + +#define WDIOF_NUM_STATUS 8 + +static const struct wdiof_status wdiof_status[WDIOF_NUM_STATUS] = { + {WDIOF_SETTIMEOUT, "Set timeout (in seconds)"}, + {WDIOF_MAGICCLOSE, "Supports magic close char"}, + {WDIOF_PRETIMEOUT, "Pretimeout (in seconds), get/set"}, + {WDIOF_ALARMONLY, "Watchdog triggers a management or other external alarm not a reboot"}, + {WDIOF_KEEPALIVEPING, "Keep alive ping reply"}, + {WDIOS_DISABLECARD, "Turn off the watchdog timer"}, + {WDIOS_ENABLECARD, "Turn on the watchdog timer"}, + {WDIOS_TEMPPANIC, "Kernel panic on temperature trip"}, +}; + +static void print_status(int flags) +{ + int wdiof = 0; + + if (flags == WDIOS_UNKNOWN) { + printf("Unknown status error from WDIOC_GETSTATUS\n"); + return; + } + + for (wdiof = 0; wdiof < WDIOF_NUM_STATUS; wdiof++) { + if (flags & wdiof_status[wdiof].flag) + printf("Support/Status: %s\n", + wdiof_status[wdiof].status_str); + } +} + +#define WDIOF_NUM_BOOTSTATUS 7 + +static const struct wdiof_status wdiof_bootstatus[WDIOF_NUM_BOOTSTATUS] = { + {WDIOF_OVERHEAT, "Reset due to CPU overheat"}, + {WDIOF_FANFAULT, "Fan failed"}, + {WDIOF_EXTERN1, "External relay 1"}, + {WDIOF_EXTERN2, "External relay 2"}, + {WDIOF_POWERUNDER, "Power bad/power fault"}, + {WDIOF_CARDRESET, "Card previously reset the CPU"}, + {WDIOF_POWEROVER, "Power over voltage"}, +}; + +static void print_boot_status(int flags) +{ + int wdiof = 0; + + if (flags == WDIOF_UNKNOWN) { + printf("Unknown flag error from WDIOC_GETBOOTSTATUS\n"); + return; + } + + if (flags == 0) { + printf("Last boot is caused by: Power-On-Reset\n"); + return; + } + + for (wdiof = 0; wdiof < WDIOF_NUM_BOOTSTATUS; wdiof++) { + if (flags & wdiof_bootstatus[wdiof].flag) + printf("Last boot is caused by: %s\n", + wdiof_bootstatus[wdiof].status_str); + } +} + int main(int argc, char *argv[]) { int flags; @@ -100,6 +177,7 @@ int main(int argc, char *argv[]) int oneshot = 0; char *file = "/dev/watchdog"; struct watchdog_info info; + int temperature; setbuf(stdout, NULL); @@ -140,8 +218,7 @@ int main(int argc, char *argv[]) oneshot = 1; ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags); if (!ret) - printf("Last boot is caused by: %s.\n", (flags != 0) ? - "Watchdog" : "Power-On-Reset"); + print_boot_status(flags); else printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno)); break; @@ -171,6 +248,21 @@ int main(int argc, char *argv[]) ping_rate = DEFAULT_PING_RATE; printf("Watchdog ping rate set to %u seconds.\n", ping_rate); break; + case 's': + flags = 0; + oneshot = 1; + ret = ioctl(fd, WDIOC_GETSTATUS, &flags); + if (!ret) + print_status(flags); + else + printf("WDIOC_GETSTATUS error '%s'\n", strerror(errno)); + ret = ioctl(fd, WDIOC_GETTEMP, &temperature); + if (ret) + printf("WDIOC_GETTEMP: '%s'\n", strerror(errno)); + else + printf("Temperature %d\n", temperature); + + break; case 't': flags = strtoul(optarg, NULL, 0); ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); @@ -228,7 +320,7 @@ int main(int argc, char *argv[]) printf(" identity:\t\t%s\n", info.identity); printf(" firmware_version:\t%u\n", info.firmware_version); - printf(" options:\t\t%08x\n", info.options); + print_status(info.options); break; default: @@ -249,6 +341,10 @@ int main(int argc, char *argv[]) sleep(ping_rate); } end: + /* + * Send specific magic character 'V' just in case Magic Close is + * enabled to ensure watchdog gets disabled on close. + */ ret = write(fd, &v, 1); if (ret < 0) printf("Stopping watchdog ticks failed (%d)...\n", errno); diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index ce2a04717300..6327c9c400e0 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -64,8 +64,6 @@ CONFIG_PROC_FS=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y -CONFIG_RANDOM_TRUST_CPU=y -CONFIG_RANDOM_TRUST_BOOTLOADER=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index b8ec6c15bccb..4dee343909b1 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -903,7 +903,7 @@ out_err: return NULL; } -static void osnoise_usage(void) +static void osnoise_usage(int err) { int i; @@ -923,7 +923,7 @@ static void osnoise_usage(void) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + exit(err); } int osnoise_main(int argc, char *argv[]) @@ -941,8 +941,7 @@ int osnoise_main(int argc, char *argv[]) } if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { - osnoise_usage(); - exit(0); + osnoise_usage(0); } else if (strncmp(argv[1], "-", 1) == 0) { /* the user skipped the tool, call the default one */ osnoise_top_main(argc, argv); @@ -956,6 +955,6 @@ int osnoise_main(int argc, char *argv[]) } usage: - osnoise_usage(); + osnoise_usage(1); exit(1); } diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c index 09bd21b8af81..52e8f1825281 100644 --- a/tools/tracing/rtla/src/rtla.c +++ b/tools/tracing/rtla/src/rtla.c @@ -14,7 +14,7 @@ /* * rtla_usage - print rtla usage */ -static void rtla_usage(void) +static void rtla_usage(int err) { int i; @@ -33,7 +33,7 @@ static void rtla_usage(void) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + exit(err); } /* @@ -70,11 +70,9 @@ int main(int argc, char *argv[]) goto usage; if (strcmp(argv[1], "-h") == 0) { - rtla_usage(); - exit(0); + rtla_usage(0); } else if (strcmp(argv[1], "--help") == 0) { - rtla_usage(); - exit(0); + rtla_usage(0); } retval = run_command(argc, argv, 1); @@ -82,6 +80,6 @@ int main(int argc, char *argv[]) exit(0); usage: - rtla_usage(); + rtla_usage(1); exit(1); } diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c index 97abbf494fee..21cdcc5c4a29 100644 --- a/tools/tracing/rtla/src/timerlat.c +++ b/tools/tracing/rtla/src/timerlat.c @@ -14,7 +14,7 @@ #include "timerlat.h" -static void timerlat_usage(void) +static void timerlat_usage(int err) { int i; @@ -34,7 +34,7 @@ static void timerlat_usage(void) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + exit(err); } int timerlat_main(int argc, char *argv[]) @@ -52,8 +52,7 @@ int timerlat_main(int argc, char *argv[]) } if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { - timerlat_usage(); - exit(0); + timerlat_usage(0); } else if (strncmp(argv[1], "-", 1) == 0) { /* the user skipped the tool, call the default one */ timerlat_top_main(argc, argv); @@ -67,6 +66,6 @@ int timerlat_main(int argc, char *argv[]) } usage: - timerlat_usage(); + timerlat_usage(1); exit(1); } diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile new file mode 100644 index 000000000000..3d0f3888a58c --- /dev/null +++ b/tools/verification/rv/Makefile @@ -0,0 +1,141 @@ +NAME := rv +# Follow the kernel version +VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion | grep -v make) + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +MKDIR = mkdir +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include +LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LIBS := $$($(PKG_CONFIG) --libs libtracefs) + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(or $(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rv/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rv/ +endif + +LIBTRACEEVENT_MIN_VERSION = 1.5 +LIBTRACEFS_MIN_VERSION = 1.3 + +.PHONY: all warnings show_warnings +all: warnings rv + +TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEEVENT)", "n") +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtraceevent-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ "; +endif + +TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEFS)", "n") +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtracefs-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ "; +endif + +define show_dependencies + @echo "********************************************"; \ + echo "** NOTICE: Failed build dependencies"; \ + echo "**"; \ + echo "** Required Libraries:"; \ + $(MISSING_LIBS) \ + echo "**"; \ + echo "** Consider installing the latest libtracefs from your"; \ + echo "** distribution, e.g., 'dnf install $(MISSING_PACKAGES)' on Fedora,"; \ + echo "** or from source:"; \ + echo "**"; \ + $(MISSING_SOURCE) \ + echo "**"; \ + echo "********************************************" +endef + +show_warnings: + $(call show_dependencies); + +ifneq ("$(WARNINGS)", "") +ERROR_OUT = $(error Please add the necessary dependencies) + +warnings: $(WARNINGS) + $(ERROR_OUT) +endif + +rv: $(OBJ) + $(CC) -o rv $(LDFLAGS) $(OBJ) $(LIBS) + +.PHONY: install +install: doc_install + $(MKDIR) -p $(DESTDIR)$(BINDIR) + $(INSTALL) rv -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rv + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rv || rm rv + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + echo $(VERSION) > $(NAME)-$(VERSION)/VERSION + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rv/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/verification/rv/README.txt b/tools/verification/rv/README.txt new file mode 100644 index 000000000000..e96be0dfff59 --- /dev/null +++ b/tools/verification/rv/README.txt @@ -0,0 +1,38 @@ +RV: Runtime Verification + +Runtime Verification (RV) is a lightweight (yet rigorous) method that +complements classical exhaustive verification techniques (such as model +checking and theorem proving) with a more practical approach for +complex systems. + +The rv tool is the interface for a collection of monitors that aim +analysing the logical and timing behavior of Linux. + +Installing RV + +RV depends on the following libraries and tools: + + - libtracefs + - libtraceevent + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rv_src + $ make + $ sudo make install + +For further information, please see rv manpage and the kernel documentation: + Runtime Verification: + Documentation/trace/rv/runtime-verification.rst diff --git a/tools/verification/rv/include/in_kernel.h b/tools/verification/rv/include/in_kernel.h new file mode 100644 index 000000000000..3090638c8d71 --- /dev/null +++ b/tools/verification/rv/include/in_kernel.h @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +int ikm_list_monitors(void); +int ikm_run_monitor(char *monitor, int argc, char **argv); diff --git a/tools/verification/rv/include/rv.h b/tools/verification/rv/include/rv.h new file mode 100644 index 000000000000..770fd6da3610 --- /dev/null +++ b/tools/verification/rv/include/rv.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define MAX_DESCRIPTION 1024 +#define MAX_DA_NAME_LEN 24 + +struct monitor { + char name[MAX_DA_NAME_LEN]; + char desc[MAX_DESCRIPTION]; + int enabled; +}; + +int should_stop(void); diff --git a/tools/verification/rv/include/trace.h b/tools/verification/rv/include/trace.h new file mode 100644 index 000000000000..8d89e8c303fa --- /dev/null +++ b/tools/verification/rv/include/trace.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <tracefs.h> + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +int collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context); diff --git a/tools/verification/rv/include/utils.h b/tools/verification/rv/include/utils.h new file mode 100644 index 000000000000..f24ae8282bd2 --- /dev/null +++ b/tools/verification/rv/include/utils.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define MAX_PATH 1024 + +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +extern int config_debug; diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c new file mode 100644 index 000000000000..50848d79b38b --- /dev/null +++ b/tools/verification/rv/src/in_kernel.c @@ -0,0 +1,698 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * in kernel monitor support: allows rv to control in-kernel monitors. + * + * Copyright (C) 2022 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> + +#include <trace.h> +#include <utils.h> +#include <rv.h> + +static int config_has_id; +static int config_my_pid; +static int config_trace; + +static char *config_initial_reactor; +static char *config_reactor; + +/* + * __ikm_read_enable - reads monitor's enable status + * + * __does not log errors. + * + * Returns the current status, or -1 if the monitor does not exist, + * __hence not logging errors. + */ +static int __ikm_read_enable(char *monitor_name) +{ + char path[MAX_PATH]; + long long enabled; + int retval; + + snprintf(path, MAX_PATH, "rv/monitors/%s/enable", monitor_name); + + retval = tracefs_instance_file_read_number(NULL, path, &enabled); + if (retval < 0) + return -1; + + return enabled; +} + +/* + * ikm_read_enable - reads monitor's enable status + * + * Returns the current status, or -1 on error. + */ +static int ikm_read_enable(char *monitor_name) +{ + int enabled; + + enabled = __ikm_read_enable(monitor_name); + if (enabled < 0) { + err_msg("ikm: fail read enabled: %d\n", enabled); + return -1; + } + + debug_msg("ikm: read enabled: %d\n", enabled); + + return enabled; +} + +/* + * ikm_write_enable - write to the monitor's enable file + * + * Return the number of bytes written, -1 on error. + */ +static int ikm_write_enable(char *monitor_name, char *enable_disable) +{ + char path[MAX_PATH]; + int retval; + + debug_msg("ikm: writing enabled: %s\n", enable_disable); + + snprintf(path, MAX_PATH, "rv/monitors/%s/enable", monitor_name); + retval = tracefs_instance_file_write(NULL, path, enable_disable); + if (retval < strlen(enable_disable)) { + err_msg("ikm: writing enabled: %s\n", enable_disable); + return -1; + } + + return retval; +} + +/* + * ikm_enable - enable a monitor + * + * Returns -1 on failure. Success otherwise. + */ +static int ikm_enable(char *monitor_name) +{ + return ikm_write_enable(monitor_name, "1"); +} + +/* + * ikm_disable - disable a monitor + * + * Returns -1 on failure. Success otherwise. + */ +static int ikm_disable(char *monitor_name) +{ + return ikm_write_enable(monitor_name, "0"); +} + +/* + * ikm_read_desc - read monitors' description + * + * Return a dynamically allocated string with the monitor's + * description, NULL otherwise. + */ +static char *ikm_read_desc(char *monitor_name) +{ + char path[MAX_PATH]; + char *desc; + + snprintf(path, MAX_PATH, "rv/monitors/%s/desc", monitor_name); + desc = tracefs_instance_file_read(NULL, path, NULL); + if (!desc) { + err_msg("ikm: error reading monitor %s desc\n", monitor_name); + return NULL; + } + + *strstr(desc, "\n") = '\0'; + + return desc; +} + +/* + * ikm_fill_monitor_definition - fill monitor's definition + * + * Returns -1 on error, 0 otherwise. + */ +static int ikm_fill_monitor_definition(char *name, struct monitor *ikm) +{ + int enabled; + char *desc; + + enabled = ikm_read_enable(name); + if (enabled < 0) { + err_msg("ikm: monitor %s fail to read enable file, bug?\n", name); + return -1; + } + + desc = ikm_read_desc(name); + if (!desc) { + err_msg("ikm: monitor %s does not have desc file, bug?\n", name); + return -1; + } + + strncpy(ikm->name, name, MAX_DA_NAME_LEN); + ikm->enabled = enabled; + strncpy(ikm->desc, desc, MAX_DESCRIPTION); + + free(desc); + + return 0; +} + +/* + * ikm_write_reactor - switch the reactor to *reactor + * + * Return the number or characters written, -1 on error. + */ +static int ikm_write_reactor(char *monitor_name, char *reactor) +{ + char path[MAX_PATH]; + int retval; + + snprintf(path, MAX_PATH, "rv/monitors/%s/reactors", monitor_name); + retval = tracefs_instance_file_write(NULL, path, reactor); + debug_msg("ikm: write \"%s\" reactors: %d\n", reactor, retval); + + return retval; +} + +/* + * ikm_read_reactor - read the reactors file + * + * Returns a dynamically allocated string with monitor's + * available reactors, or NULL on error. + */ +static char *ikm_read_reactor(char *monitor_name) +{ + char path[MAX_PATH]; + char *reactors; + + snprintf(path, MAX_PATH, "rv/monitors/%s/reactors", monitor_name); + reactors = tracefs_instance_file_read(NULL, path, NULL); + if (!reactors) { + err_msg("ikm: fail reading monitor's %s reactors file\n", monitor_name); + return NULL; + } + + return reactors; +} + +/* + * ikm_get_current_reactor - get the current enabled reactor + * + * Reads the reactors file and find the currently enabled + * [reactor]. + * + * Returns a dynamically allocated memory with the current + * reactor. NULL otherwise. + */ +static char *ikm_get_current_reactor(char *monitor_name) +{ + char *reactors = ikm_read_reactor(monitor_name); + char *start; + char *end; + char *curr_reactor; + + if (!reactors) + return NULL; + + start = strstr(reactors, "["); + if (!start) + goto out_free; + + start++; + + end = strstr(start, "]"); + if (!end) + goto out_free; + + *end = '\0'; + + curr_reactor = calloc(strlen(start) + 1, sizeof(char)); + if (!curr_reactor) + goto out_free; + + strncpy(curr_reactor, start, strlen(start)); + debug_msg("ikm: read current reactor %s\n", curr_reactor); + +out_free: + free(reactors); + + return curr_reactor; +} + +static int ikm_has_id(char *monitor_name) +{ + char path[MAX_PATH]; + char *format; + int has_id; + + snprintf(path, MAX_PATH, "events/rv/event_%s/format", monitor_name); + format = tracefs_instance_file_read(NULL, path, NULL); + if (!format) { + err_msg("ikm: fail reading monitor's %s format event file\n", monitor_name); + return -1; + } + + /* print fmt: "%d: %s x %s -> %s %s", REC->id, ... */ + has_id = !!strstr(format, "REC->id"); + + debug_msg("ikm: monitor %s has id: %s\n", monitor_name, has_id ? "yes" : "no"); + + free(format); + + return has_id; +} + +/** + * ikm_list_monitors - list all available monitors + * + * Returns 0 on success, -1 otherwise. + */ +int ikm_list_monitors(void) +{ + char *available_monitors; + struct monitor ikm; + char *curr, *next; + int retval; + + available_monitors = tracefs_instance_file_read(NULL, "rv/available_monitors", NULL); + + if (!available_monitors) { + err_msg("ikm: available monitors is not available, is CONFIG_RV enabled?\n"); + return -1; + } + + curr = available_monitors; + do { + next = strstr(curr, "\n"); + *next = '\0'; + + retval = ikm_fill_monitor_definition(curr, &ikm); + if (retval) + err_msg("ikm: error reading %d in kernel monitor, skipping\n", curr); + + printf("%-24s %s %s\n", ikm.name, ikm.desc, ikm.enabled ? "[ON]" : "[OFF]"); + curr = ++next; + + } while (strlen(curr)); + + free(available_monitors); + + return 0; +} + +static void ikm_print_header(struct trace_seq *s) +{ + trace_seq_printf(s, "%16s-%-8s %5s %5s ", "<TASK>", "PID", "[CPU]", "TYPE"); + if (config_has_id) + trace_seq_printf(s, "%8s ", "ID"); + + trace_seq_printf(s, "%24s x %-24s -> %-24s %s\n", + "STATE", + "EVENT", + "NEXT_STATE", + "FINAL"); + + trace_seq_printf(s, "%16s %-8s %5s %5s ", " | ", " | ", " | ", " | "); + + if (config_has_id) + trace_seq_printf(s, "%8s ", " | "); + + trace_seq_printf(s, "%24s %-24s %-24s %s\n", + " | ", + " | ", + " | ", + "|"); + +} + +/* + * ikm_event_handler - callback to handle event events + * + * Called any time a rv:"monitor"_event events is generated. + * It parses and print event. + */ +static int +ikm_event_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *trace_event, void *context) +{ + /* if needed: struct trace_instance *inst = context; */ + char *state, *event, *next_state; + unsigned long long final_state; + unsigned long long pid; + unsigned long long id; + int cpu = record->cpu; + int val; + + if (config_has_id) + tep_get_field_val(s, trace_event, "id", record, &id, 1); + + tep_get_common_field_val(s, trace_event, "common_pid", record, &pid, 1); + + if (config_has_id && (config_my_pid == id)) + return 0; + else if (config_my_pid && (config_my_pid == pid)) + return 0; + + tep_print_event(trace_event->tep, s, record, "%16s-%-8d ", TEP_PRINT_COMM, TEP_PRINT_PID); + + trace_seq_printf(s, "[%.3d] event ", cpu); + + if (config_has_id) + trace_seq_printf(s, "%8llu ", id); + + state = tep_get_field_raw(s, trace_event, "state", record, &val, 0); + event = tep_get_field_raw(s, trace_event, "event", record, &val, 0); + next_state = tep_get_field_raw(s, trace_event, "next_state", record, &val, 0); + tep_get_field_val(s, trace_event, "final_state", record, &final_state, 1); + + trace_seq_printf(s, "%24s x %-24s -> %-24s %s\n", + state, + event, + next_state, + final_state ? "Y" : "N"); + + trace_seq_do_printf(s); + trace_seq_reset(s); + + return 0; +} + +/* + * ikm_error_handler - callback to handle error events + * + * Called any time a rv:"monitor"_errors events is generated. + * It parses and print event. + */ +static int +ikm_error_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *trace_event, void *context) +{ + unsigned long long pid, id; + int cpu = record->cpu; + char *state, *event; + int val; + + if (config_has_id) + tep_get_field_val(s, trace_event, "id", record, &id, 1); + + tep_get_common_field_val(s, trace_event, "common_pid", record, &pid, 1); + + if (config_has_id && config_my_pid == id) + return 0; + else if (config_my_pid == pid) + return 0; + + trace_seq_printf(s, "%8lld [%03d] error ", pid, cpu); + + if (config_has_id) + trace_seq_printf(s, "%8llu ", id); + + state = tep_get_field_raw(s, trace_event, "state", record, &val, 0); + event = tep_get_field_raw(s, trace_event, "event", record, &val, 0); + + trace_seq_printf(s, "%24s x %s\n", state, event); + + trace_seq_do_printf(s); + trace_seq_reset(s); + + return 0; +} + +/* + * ikm_setup_trace_instance - set up a tracing instance to collect data + * + * Create a trace instance, enable rv: events and enable the trace. + * + * Returns the trace_instance * with all set, NULL otherwise. + */ +static struct trace_instance *ikm_setup_trace_instance(char *monitor_name) +{ + char event[MAX_DA_NAME_LEN + 7]; /* max(error_,event_) + '0' = 7 */ + struct trace_instance *inst; + int retval; + + if (!config_trace) + return NULL; + + config_has_id = ikm_has_id(monitor_name); + if (config_has_id < 0) { + err_msg("ikm: failed to read monitor %s event format\n", monitor_name); + goto out_err; + } + + /* alloc data */ + inst = calloc(1, sizeof(*inst)); + if (!inst) { + err_msg("ikm: failed to allocate trace instance"); + goto out_err; + } + + retval = trace_instance_init(inst, monitor_name); + if (retval) + goto out_free; + + /* enable events */ + snprintf(event, sizeof(event), "event_%s", monitor_name); + retval = tracefs_event_enable(inst->inst, "rv", event); + if (retval) + goto out_inst; + + tep_register_event_handler(inst->tep, -1, "rv", event, + ikm_event_handler, NULL); + + snprintf(event, sizeof(event), "error_%s", monitor_name); + retval = tracefs_event_enable(inst->inst, "rv", event); + if (retval) + goto out_inst; + + tep_register_event_handler(inst->tep, -1, "rv", event, + ikm_error_handler, NULL); + + /* ready to enable */ + tracefs_trace_on(inst->inst); + + return inst; + +out_inst: + trace_instance_destroy(inst); +out_free: + free(inst); +out_err: + return NULL; +} + +/** + * ikm_destroy_trace_instance - destroy a previously created instance + */ +static void ikm_destroy_trace_instance(struct trace_instance *inst) +{ + if (!inst) + return; + + trace_instance_destroy(inst); + free(inst); +} + +/* + * ikm_usage_print_reactors - print all available reactors, one per line. + */ +static void ikm_usage_print_reactors(void) +{ + char *reactors = tracefs_instance_file_read(NULL, "rv/available_reactors", NULL); + char *start, *end; + + if (!reactors) + return; + + fprintf(stderr, " available reactors:"); + + start = reactors; + end = strstr(start, "\n"); + + while (end) { + *end = '\0'; + + fprintf(stderr, " %s", start); + + start = ++end; + end = strstr(start, "\n"); + }; + + fprintf(stderr, "\n"); +} +/* + * ikm_usage - print usage + */ +static void ikm_usage(int exit_val, char *monitor_name, const char *fmt, ...) +{ + + char message[1024]; + va_list ap; + int i; + + static const char *const usage[] = { + "", + " -h/--help: print this menu and the reactor list", + " -r/--reactor 'reactor': enables the 'reactor'", + " -s/--self: when tracing (-t), also trace rv command", + " -t/--trace: trace monitor's event", + " -v/--verbose: print debug messages", + "", + NULL, + }; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, " %s\n", message); + + fprintf(stderr, "\n usage: rv mon %s [-h] [-q] [-r reactor] [-s] [-v]", monitor_name); + + for (i = 0; usage[i]; i++) + fprintf(stderr, "%s\n", usage[i]); + + ikm_usage_print_reactors(); + exit(exit_val); +} + +/* + * parse_arguments - parse arguments and set config + */ +static int parse_arguments(char *monitor_name, int argc, char **argv) +{ + int c, retval; + + config_my_pid = getpid(); + + while (1) { + static struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"reactor", required_argument, 0, 'r'}, + {"self", no_argument, 0, 's'}, + {"trace", no_argument, 0, 't'}, + {"verbose", no_argument, 0, 'v'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "hr:stv", long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'h': + ikm_usage(0, monitor_name, "help:"); + break; + case 'r': + config_reactor = optarg; + break; + case 's': + config_my_pid = 0; + break; + case 't': + config_trace = 1; + break; + case 'v': + config_debug = 1; + break; + } + } + + if (config_reactor) { + config_initial_reactor = ikm_get_current_reactor(monitor_name); + if (!config_initial_reactor) + ikm_usage(1, monitor_name, + "ikm: failed to read current reactor, are reactors enabled?"); + + retval = ikm_write_reactor(monitor_name, config_reactor); + if (retval <= 0) + ikm_usage(1, monitor_name, + "ikm: failed to set %s reactor, is it available?", + config_reactor); + } + + debug_msg("ikm: my pid is %d\n", config_my_pid); + + return 0; +} + +/** + * ikm_run_monitor - apply configs and run the monitor + * + * Returns 1 if a monitor was found an executed, 0 if no + * monitors were found, or -1 on error. + */ +int ikm_run_monitor(char *monitor_name, int argc, char **argv) +{ + struct trace_instance *inst = NULL; + int retval; + + /* + * Check if monitor exists by seeing it is enabled. + */ + retval = __ikm_read_enable(monitor_name); + if (retval < 0) + return 0; + + if (retval) { + err_msg("ikm: monitor %s (in-kernel) is already enabled\n", monitor_name); + return -1; + } + + /* we should be good to go */ + retval = parse_arguments(monitor_name, argc, argv); + if (retval) + ikm_usage(1, monitor_name, "ikm: failed parsing arguments"); + + if (config_trace) { + inst = ikm_setup_trace_instance(monitor_name); + if (!inst) + return -1; + } + + retval = ikm_enable(monitor_name); + if (retval < 0) + goto out_free_instance; + + if (config_trace) + ikm_print_header(inst->seq); + + while (!should_stop()) { + if (config_trace) { + retval = tracefs_iterate_raw_events(inst->tep, + inst->inst, + NULL, + 0, + collect_registered_events, + inst); + if (retval) { + err_msg("ikm: error reading trace buffer\n"); + break; + } + } + + sleep(1); + } + + ikm_disable(monitor_name); + ikm_destroy_trace_instance(inst); + + if (config_reactor && config_initial_reactor) + ikm_write_reactor(monitor_name, config_initial_reactor); + + return 1; + +out_free_instance: + ikm_destroy_trace_instance(inst); + if (config_reactor && config_initial_reactor) + ikm_write_reactor(monitor_name, config_initial_reactor); + return -1; +} diff --git a/tools/verification/rv/src/rv.c b/tools/verification/rv/src/rv.c new file mode 100644 index 000000000000..e601cd9c411e --- /dev/null +++ b/tools/verification/rv/src/rv.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rv tool, the interface for the Linux kernel RV subsystem and home of + * user-space controlled monitors. + * + * Copyright (C) 2022 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> + +#include <trace.h> +#include <utils.h> +#include <in_kernel.h> + +static int stop_session; + +/* + * stop_rv - tell monitors to stop + */ +static void stop_rv(int sig) +{ + stop_session = 1; +} + +/** + * should_stop - check if the monitor should stop. + * + * Returns 1 if the monitor should stop, 0 otherwise. + */ +int should_stop(void) +{ + return stop_session; +} + +/* + * rv_list - list all available monitors + */ +static void rv_list(int argc, char **argv) +{ + static const char *const usage[] = { + "", + " usage: rv list [-h]", + "", + " list all available monitors", + "", + " -h/--help: print this menu", + NULL, + }; + int i; + + if (argc > 1) { + fprintf(stderr, "rv version %s\n", VERSION); + + /* more than 1 is always usage */ + for (i = 0; usage[i]; i++) + fprintf(stderr, "%s\n", usage[i]); + + /* but only -h is valid */ + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) + exit(0); + else + exit(1); + } + + ikm_list_monitors(); + exit(0); +} + +/* + * rv_mon - try to run a monitor passed as argument + */ +static void rv_mon(int argc, char **argv) +{ + char *monitor_name; + int i, run; + + static const char *const usage[] = { + "", + " usage: rv mon [-h] monitor [monitor options]", + "", + " run a monitor", + "", + " -h/--help: print this menu", + "", + " monitor [monitor options]: the monitor, passing", + " the arguments to the [monitor options]", + NULL, + }; + + /* requires at least one argument */ + if (argc == 1) { + + fprintf(stderr, "rv version %s\n", VERSION); + + for (i = 0; usage[i]; i++) + fprintf(stderr, "%s\n", usage[i]); + exit(1); + } else if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { + + fprintf(stderr, "rv version %s\n", VERSION); + + for (i = 0; usage[i]; i++) + fprintf(stderr, "%s\n", usage[i]); + exit(0); + } + + monitor_name = argv[1]; + /* + * Call all possible monitor implementations, looking + * for the [monitor]. + */ + run += ikm_run_monitor(monitor_name, argc-1, &argv[1]); + + if (!run) + err_msg("rv: monitor %s does not exist\n", monitor_name); + exit(!run); +} + +static void usage(int exit_val, const char *fmt, ...) +{ + char message[1024]; + va_list ap; + int i; + + static const char *const usage[] = { + "", + " usage: rv command [-h] [command_options]", + "", + " -h/--help: print this menu", + "", + " command: run one of the following command:", + " list: list all available monitors", + " mon: run a monitor", + "", + " [command options]: each command has its own set of options", + " run rv command -h for further information", + NULL, + }; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "rv version %s: %s\n", VERSION, message); + + for (i = 0; usage[i]; i++) + fprintf(stderr, "%s\n", usage[i]); + + exit(exit_val); +} + +/* + * main - select which main sending the command + * + * main itself redirects the arguments to the sub-commands + * to handle the options. + * + * subcommands should exit. + */ +int main(int argc, char **argv) +{ + if (geteuid()) + usage(1, "%s needs root permission", argv[0]); + + if (argc <= 1) + usage(1, "%s requires a command", argv[0]); + + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) + usage(0, "help"); + + if (!strcmp(argv[1], "list")) + rv_list(--argc, &argv[1]); + + if (!strcmp(argv[1], "mon")) { + /* + * monitor's main should monitor should_stop() function. + * and exit. + */ + signal(SIGINT, stop_rv); + + rv_mon(argc - 1, &argv[1]); + } + + /* invalid sub-command */ + usage(1, "%s does not know the %s command, old version?", argv[0], argv[1]); +} diff --git a/tools/verification/rv/src/trace.c b/tools/verification/rv/src/trace.c new file mode 100644 index 000000000000..2c7deed47f8d --- /dev/null +++ b/tools/verification/rv/src/trace.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace helpers. + * + * Copyright (C) 2022 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include <rv.h> +#include <trace.h> +#include <utils.h> + +/* + * create_instance - create a trace instance with *instance_name + */ +static struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +static void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/** + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + * + * Returns 0 if the event was collected, 1 if the tool should stop collecting trace. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (should_stop()) + return 1; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/** + * trace_instance_destroy - destroy and free a rv trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + destroy_instance(trace->inst); + trace->inst = NULL; + } + + if (trace->seq) { + free(trace->seq); + trace->seq = NULL; + } + + if (trace->tep) { + tep_free(trace->tep); + trace->tep = NULL; + } +} + +/** + * trace_instance_init - create an trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + * + * Returns 0 on success, non-zero otherwise. + */ +int trace_instance_init(struct trace_instance *trace, char *name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/** + * trace_instance_start - start tracing a given rv instance + * + * Returns 0 on success, -1 otherwise. + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} diff --git a/tools/verification/rv/src/utils.c b/tools/verification/rv/src/utils.c new file mode 100644 index 000000000000..5677b439dc2f --- /dev/null +++ b/tools/verification/rv/src/utils.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * util functions. + * + * Copyright (C) 2022 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <stdarg.h> +#include <stdio.h> +#include <utils.h> + +int config_debug; + +#define MAX_MSG_LENGTH 1024 + +/** + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/** + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore index 79bb92ae1bb3..922879f93fc8 100644 --- a/tools/vm/.gitignore +++ b/tools/vm/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only slabinfo page-types +page_owner_sort diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 0fffaeedee76..cfaeaea71042 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -157,9 +157,11 @@ static unsigned long read_obj(const char *name) { FILE *f = fopen(name, "r"); - if (!f) + if (!f) { buffer[0] = 0; - else { + if (errno == EACCES) + fatal("%s, Try using superuser\n", strerror(errno)); + } else { if (!fgets(buffer, sizeof(buffer), f)) buffer[0] = 0; fclose(f); |