diff options
author | Johannes Berg <johannes.berg@intel.com> | 2019-02-22 13:48:13 +0100 |
---|---|---|
committer | Johannes Berg <johannes.berg@intel.com> | 2019-02-22 13:48:13 +0100 |
commit | b7b14ec1ebef35d22f3f4087816468f22c987f75 (patch) | |
tree | 3f99f4d7b770d7bba3ee84663b32f98dfbe7582d /tools | |
parent | mac80211: update HE IEs to D3.3 (diff) | |
parent | rocker: Add missing break for PRE_BRIDGE_FLAGS (diff) | |
download | linux-b7b14ec1ebef35d22f3f4087816468f22c987f75.tar.xz linux-b7b14ec1ebef35d22f3f4087816468f22c987f75.zip |
Merge remote-tracking branch 'net-next/master' into mac80211-next
Merge net-next to resolve a conflict and to get the mac80211
rhashtable fixes so further patches can be applied on top.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'tools')
134 files changed, 6051 insertions, 628 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index d43fce568ef7..9bb9ace54ba8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -17,8 +17,8 @@ SYNOPSIS *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } -MAP COMMANDS -============= +CGROUP COMMANDS +=============== | **bpftool** **cgroup { show | list }** *CGROUP* | **bpftool** **cgroup tree** [*CGROUP_ROOT*] diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 8d489a26e3c9..82de03dd8f52 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -16,8 +16,8 @@ SYNOPSIS *COMMANDS* := { **probe** | **help** } -MAP COMMANDS -============= +FEATURE COMMANDS +================ | **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]] | **bpftool** **feature help** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 13b56102f528..7e59495cb028 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -18,7 +18,7 @@ SYNOPSIS { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **loadall** | **help** } -MAP COMMANDS +PROG COMMANDS ============= | **bpftool** **prog { show | list }** [*PROG*] diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 27153bb816ac..4f2188845dd8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -34,6 +34,8 @@ SYNOPSIS *NET-COMMANDS* := { **show** | **list** | **help** } + *FEATURE-COMMANDS* := { **probe** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 897483457bf0..f7261fad45c1 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key) snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); - if (!fdi) { - p_err("can't open fdinfo: %s", strerror(errno)); + if (!fdi) return NULL; - } while ((n = getline(&line, &line_n, fdi)) > 0) { char *value; @@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key) value = strchr(line, '\t'); if (!value || !value[1]) { - p_err("malformed fdinfo!?"); free(line); return NULL; } @@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key) return line; } - p_err("key '%s' not found in fdinfo", key); free(line); fclose(fdi); return NULL; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2160a8ef17e5..e0c650d91784 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -358,6 +358,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, return argv + i; } +/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@ -440,6 +454,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info, argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); } return parse_elem(argv, info, key, NULL, key_size, value_size, @@ -511,10 +527,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); } - if (atoi(owner_jited)) - jsonw_bool_field(json_wtr, "owner_jited", true); - else - jsonw_bool_field(json_wtr, "owner_jited", false); + if (owner_jited) + jsonw_bool_field(json_wtr, "owner_jited", + !!atoi(owner_jited)); free(owner_prog_type); free(owner_jited); @@ -567,7 +582,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); char *owner_jited = get_fdinfo(fd, "owner_jited"); - printf("\n\t"); + if (owner_prog_type || owner_jited) + printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); @@ -577,10 +593,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) else printf("owner_prog_type %d ", prog_type); } - if (atoi(owner_jited)) - printf("owner jited"); - else - printf("owner not jited"); + if (owner_jited) + printf("owner%s jited", + atoi(owner_jited) ? "" : " not"); free(owner_prog_type); free(owner_jited); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 0640e9bc0ada..33ed0806ccc0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd; while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 3040830d7797..84545666a09c 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -330,7 +330,7 @@ static const struct option longopts[] = { int main(int argc, char **argv) { - unsigned long long num_loops = 2; + long long num_loops = 2; unsigned long timedelay = 1000000; unsigned long buf_len = 128; diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index fd92ce8388fc..57aaeaf8e192 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -15,6 +15,8 @@ #include "../../arch/ia64/include/uapi/asm/bitsperlong.h" #elif defined(__riscv) #include "../../arch/riscv/include/uapi/asm/bitsperlong.h" +#elif defined(__alpha__) +#include "../../arch/alpha/include/uapi/asm/bitsperlong.h" #else #include <asm-generic/bitsperlong.h> #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1777fa0c61e4..bcdd2474eee7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2016,6 +2016,19 @@ union bpf_attr { * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to LWT_BPF_MAX_HEADROOM total + * bytes in all prepended headers. Please note that + * if skb_is_gso(skb) is true, no more than two headers + * can be prepended, and the inner header, if present, + * should be either GRE or UDP/GUE. + * + * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of + * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called + * by bpf programs of types BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2329,6 +2342,23 @@ union bpf_attr { * "**y**". * Return * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in bpf_sock can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2425,7 +2455,9 @@ union bpf_attr { FN(msg_pop_data), \ FN(rc_pointer_rel), \ FN(spin_lock), \ - FN(spin_unlock), + FN(spin_unlock), \ + FN(sk_fullsock), \ + FN(tcp_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2498,7 +2530,8 @@ enum bpf_hdr_start_off { /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, }; #define __bpf_md_ptr(type, name) \ @@ -2545,6 +2578,7 @@ struct __sk_buff { __u64 tstamp; __u32 wire_len; __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); }; struct bpf_tunnel_key { @@ -2586,7 +2620,15 @@ enum bpf_ret_code { BPF_DROP = 2, /* 3-6 reserved */ BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes */ + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, }; struct bpf_sock { @@ -2596,14 +2638,52 @@ struct bpf_sock { __u32 protocol; __u32 mark; __u32 priority; - __u32 src_ip4; /* Allows 1,2,4-byte read. - * Stored in network byte order. + /* IP address also allows 1 and 2 bytes access */ + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ + __u32 dst_port; /* network byte order */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; +}; + +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. */ - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. - * Stored in network byte order. + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. */ - __u32 src_port; /* Allows 4-byte read. - * Stored in host byte order + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. */ }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d6533828123a..5b225ff63b48 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -925,6 +925,7 @@ enum { enum { LINK_XSTATS_TYPE_UNSPEC, LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, __LINK_XSTATS_TYPE_MAX }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index f6052e70bf40..a55cb8b10165 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/tools/include/uapi/linux/tc_act/tc_bpf.h +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include <linux/pkt_cls.h> -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3defad77dc7a..9cd015574e83 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -22,6 +22,7 @@ */ #include <stdlib.h> +#include <string.h> #include <memory.h> #include <unistd.h> #include <asm/unistd.h> @@ -214,23 +215,35 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, { void *finfo = NULL, *linfo = NULL; union bpf_attr attr; + __u32 log_level; __u32 name_len; int fd; - if (!load_attr) + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + log_level = load_attr->log_level; + if (log_level > 2 || (log_level && !log_buf)) return -EINVAL; name_len = load_attr->name ? strlen(load_attr->name) : 0; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; - attr.log_level = 0; + + attr.log_level = log_level; + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + } else { + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + } + attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; attr.prog_btf_fd = load_attr->prog_btf_fd; @@ -286,7 +299,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, goto done; } - if (!log_buf || !log_buf_sz) + if (log_level || !log_buf) goto done; /* Try again with log */ @@ -327,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; attr.insns = ptr_to_u64(insns); @@ -347,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -360,7 +373,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -372,7 +385,7 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -385,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -397,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); @@ -408,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); @@ -420,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; @@ -431,7 +444,7 @@ int bpf_obj_get(const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); @@ -442,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -455,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_type = type; @@ -466,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -480,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.query.target_fd = target_fd; attr.query.attach_type = type; attr.query.query_flags = query_flags; @@ -501,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; attr.test.data_in = ptr_to_u64(data); attr.test.data_out = ptr_to_u64(data_out); @@ -526,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) if (!test_attr->data_out && test_attr->data_size_out > 0) return -EINVAL; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = test_attr->prog_fd; attr.test.data_in = ptr_to_u64(test_attr->data_in); attr.test.data_out = ptr_to_u64(test_attr->data_out); @@ -546,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); @@ -561,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); @@ -575,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_id = id; return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -585,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_id = id; return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -595,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.btf_id = id; return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -606,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.info.bpf_fd = prog_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); @@ -622,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ed09eed2dc3b..6ffdd79bea89 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -85,6 +85,7 @@ struct bpf_load_program_attr { __u32 line_info_rec_size; const void *line_info; __u32 line_info_cnt; + __u32 log_level; }; /* Flags to direct loading requirements */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d682d3b8f7b9..68b50e9bbde1 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -9,12 +10,14 @@ #include <linux/btf.h> #include "btf.h" #include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" -#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) -#define BTF_MAX_NR_TYPES 65535 +#define BTF_MAX_NR_TYPES 0x7fffffff +#define BTF_MAX_STR_OFFSET 0x7fffffff #define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ ((k) == BTF_KIND_VOLATILE) || \ @@ -39,9 +42,8 @@ struct btf { struct btf_ext_info { /* - * info points to a deep copy of the individual info section - * (e.g. func_info and line_info) from the .BTF.ext. - * It does not include the __u32 rec_size. + * info points to the individual info section (e.g. func_info and + * line_info) from the .BTF.ext. It does not include the __u32 rec_size. */ void *info; __u32 rec_size; @@ -49,8 +51,13 @@ struct btf_ext_info { }; struct btf_ext { + union { + struct btf_ext_header *hdr; + void *data; + }; struct btf_ext_info func_info; struct btf_ext_info line_info; + __u32 data_size; }; struct btf_ext_info_sec { @@ -107,54 +114,54 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) return 0; } -static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_hdr(struct btf *btf) { const struct btf_header *hdr = btf->hdr; __u32 meta_left; if (btf->data_size < sizeof(struct btf_header)) { - elog("BTF header not found\n"); + pr_debug("BTF header not found\n"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF version:%u\n", hdr->version); + pr_debug("Unsupported BTF version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF flags:%x\n", hdr->flags); return -ENOTSUP; } meta_left = btf->data_size - sizeof(*hdr); if (!meta_left) { - elog("BTF has no data\n"); + pr_debug("BTF has no data\n"); return -EINVAL; } if (meta_left < hdr->type_off) { - elog("Invalid BTF type section offset:%u\n", hdr->type_off); + pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); return -EINVAL; } if (meta_left < hdr->str_off) { - elog("Invalid BTF string section offset:%u\n", hdr->str_off); + pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); return -EINVAL; } if (hdr->type_off >= hdr->str_off) { - elog("BTF type section offset >= string section offset. No type?\n"); + pr_debug("BTF type section offset >= string section offset. No type?\n"); return -EINVAL; } if (hdr->type_off & 0x02) { - elog("BTF type section is not aligned to 4 bytes\n"); + pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } @@ -163,15 +170,15 @@ static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_str_sec(struct btf *btf) { const struct btf_header *hdr = btf->hdr; const char *start = btf->nohdr_data + hdr->str_off; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || start[0] || end[-1]) { - elog("Invalid BTF string section\n"); + pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -180,7 +187,38 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_type_size(struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); + return -EINVAL; + } +} + +static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; void *nohdr_data = btf->nohdr_data; @@ -189,41 +227,13 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) while (next_type < end_type) { struct btf_type *t = next_type; - __u16 vlen = BTF_INFO_VLEN(t->info); + int type_size; int err; - next_type += sizeof(*t); - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - next_type += sizeof(int); - break; - case BTF_KIND_ARRAY: - next_type += sizeof(struct btf_array); - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - next_type += vlen * sizeof(struct btf_member); - break; - case BTF_KIND_ENUM: - next_type += vlen * sizeof(struct btf_enum); - break; - case BTF_KIND_FUNC_PROTO: - next_type += vlen * sizeof(struct btf_param); - break; - case BTF_KIND_FUNC: - case BTF_KIND_TYPEDEF: - case BTF_KIND_PTR: - case BTF_KIND_FWD: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - break; - default: - elog("Unsupported BTF_KIND:%u\n", - BTF_INFO_KIND(t->info)); - return -EINVAL; - } - + type_size = btf_type_size(t); + if (type_size < 0) + return type_size; + next_type += type_size; err = btf_add_type(btf, t); if (err) return err; @@ -232,6 +242,11 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } +__u32 btf__get_nr_types(const struct btf *btf) +{ + return btf->nr_types; +} + const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) @@ -250,21 +265,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t) return !t || btf_type_is_void(t); } -static __s64 btf_type_size(const struct btf_type *t) -{ - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - return t->size; - case BTF_KIND_PTR: - return sizeof(void *); - default: - return -EINVAL; - } -} - #define MAX_RESOLVE_DEPTH 32 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) @@ -278,11 +278,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { - size = btf_type_size(t); - if (size >= 0) - break; - switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + size = t->size; + goto done; + case BTF_KIND_PTR: + size = sizeof(void *); + goto done; case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: @@ -306,6 +311,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) if (size < 0) return -EINVAL; +done: if (nelems && size > UINT32_MAX / nelems) return -E2BIG; @@ -363,10 +369,8 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf *btf__new(__u8 *data, __u32 size) { - __u32 log_buf_size = 0; - char *log_buf = NULL; struct btf *btf; int err; @@ -376,16 +380,6 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) btf->fd = -1; - if (err_log) { - log_buf = malloc(BPF_LOG_BUF_SIZE); - if (!log_buf) { - err = -ENOMEM; - goto done; - } - *log_buf = 0; - log_buf_size = BPF_LOG_BUF_SIZE; - } - btf->data = malloc(size); if (!btf->data) { err = -ENOMEM; @@ -395,30 +389,17 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) memcpy(btf->data, data, size); btf->data_size = size; - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); - - if (btf->fd == -1) { - err = -errno; - elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (log_buf && *log_buf) - elog("%s\n", log_buf); - goto done; - } - - err = btf_parse_hdr(btf, err_log); + err = btf_parse_hdr(btf); if (err) goto done; - err = btf_parse_str_sec(btf, err_log); + err = btf_parse_str_sec(btf); if (err) goto done; - err = btf_parse_type_sec(btf, err_log); + err = btf_parse_type_sec(btf); done: - free(log_buf); - if (err) { btf__free(btf); return ERR_PTR(err); @@ -427,11 +408,47 @@ done: return btf; } +int btf__load(struct btf *btf) +{ + __u32 log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf = NULL; + int err = 0; + + if (btf->fd >= 0) + return -EEXIST; + + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + if (btf->fd < 0) { + err = -errno; + pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warning("%s\n", log_buf); + goto done; + } + +done: + free(log_buf); + return err; +} + int btf__fd(const struct btf *btf) { return btf->fd; } +const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +{ + *size = btf->data_size; + return btf->data; +} + const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) @@ -467,7 +484,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); @@ -481,7 +498,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } ptr = temp_ptr; - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); } @@ -491,7 +508,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; @@ -504,7 +521,79 @@ exit_free: return err; } -struct btf_ext_sec_copy_param { +int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); + return -EINVAL; + } + + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); + return -EINVAL; + } + + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", map_name); + return key_size; + } + + if (expected_key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warning("map:%s invalid BTF value_type_size\n", map_name); + return value_size; + } + + if (expected_value_size != value_size) { + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); + return -EINVAL; + } + + *key_type_id = key->type; + *value_type_id = value->type; + + return 0; +} + +struct btf_ext_sec_setup_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -512,41 +601,33 @@ struct btf_ext_sec_copy_param { const char *desc; }; -static int btf_ext_copy_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - struct btf_ext_sec_copy_param *ext_sec, - btf_print_fn_t err_log) +static int btf_ext_setup_info(struct btf_ext *btf_ext, + struct btf_ext_sec_setup_param *ext_sec) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; /* The start of the info sec (including the __u32 record_size). */ - const void *info; - - /* data and data_size do not include btf_ext_header from now on */ - data = data + hdr->hdr_len; - data_size -= hdr->hdr_len; + void *info; if (ext_sec->off & 0x03) { - elog(".BTF.ext %s section is not aligned to 4 bytes\n", + pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", ext_sec->desc); return -EINVAL; } - if (data_size < ext_sec->off || - ext_sec->len > data_size - ext_sec->off) { - elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", - ext_sec->desc, ext_sec->off, ext_sec->len); + info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; + info_left = ext_sec->len; + + if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { + pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } - info = data + ext_sec->off; - info_left = ext_sec->len; - /* At least a record size */ if (info_left < sizeof(__u32)) { - elog(".BTF.ext %s record size not found\n", ext_sec->desc); + pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); return -EINVAL; } @@ -554,8 +635,8 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, record_size = *(__u32 *)info; if (record_size < ext_sec->min_rec_size || record_size & 0x03) { - elog("%s section in .BTF.ext has invalid record size %u\n", - ext_sec->desc, record_size); + pr_debug("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); return -EINVAL; } @@ -564,7 +645,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - elog("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); return -EINVAL; } @@ -574,14 +655,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, __u32 num_records; if (info_left < sec_hdrlen) { - elog("%s section header is not found in .BTF.ext\n", + pr_debug("%s section header is not found in .BTF.ext\n", ext_sec->desc); return -EINVAL; } num_records = sinfo->num_info; if (num_records == 0) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -589,7 +670,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -601,74 +682,64 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; - ext_info->info = malloc(ext_info->len); - if (!ext_info->info) - return -ENOMEM; - memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + ext_info->info = info + sizeof(__u32); return 0; } -static int btf_ext_copy_func_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_setup_func_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->func_info_off, - .len = hdr->func_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->func_info_off, + .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_copy_line_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_setup_line_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->line_info_off, - .len = hdr->line_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->line_info_off, + .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; if (data_size < offsetof(struct btf_ext_header, func_info_off) || data_size < hdr->hdr_len) { - elog("BTF.ext header not found"); + pr_debug("BTF.ext header not found"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF.ext magic:%x\n", hdr->magic); + pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF.ext version:%u\n", hdr->version); + pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); return -ENOTSUP; } if (data_size == hdr->hdr_len) { - elog("BTF.ext has no data\n"); + pr_debug("BTF.ext has no data\n"); return -EINVAL; } @@ -679,18 +750,16 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (!btf_ext) return; - - free(btf_ext->func_info.info); - free(btf_ext->line_info.info); + free(btf_ext->data); free(btf_ext); } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf_ext *btf_ext__new(__u8 *data, __u32 size) { struct btf_ext *btf_ext; int err; - err = btf_ext_parse_hdr(data, size, err_log); + err = btf_ext_parse_hdr(data, size); if (err) return ERR_PTR(err); @@ -698,13 +767,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (!btf_ext) return ERR_PTR(-ENOMEM); - err = btf_ext_copy_func_info(btf_ext, data, size, err_log); - if (err) { - btf_ext__free(btf_ext); - return ERR_PTR(err); + btf_ext->data_size = size; + btf_ext->data = malloc(size); + if (!btf_ext->data) { + err = -ENOMEM; + goto done; } + memcpy(btf_ext->data, data, size); - err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + err = btf_ext_setup_func_info(btf_ext); + if (err) + goto done; + + err = btf_ext_setup_line_info(btf_ext); + if (err) + goto done; + +done: if (err) { btf_ext__free(btf_ext); return ERR_PTR(err); @@ -713,6 +792,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) return btf_ext; } +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +{ + *size = btf_ext->data_size; + return btf_ext->data; +} + static int btf_ext_reloc_info(const struct btf *btf, const struct btf_ext_info *ext_info, const char *sec_name, __u32 insns_cnt, @@ -761,7 +846,8 @@ static int btf_ext_reloc_info(const struct btf *btf, return -ENOENT; } -int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **func_info, __u32 *cnt) { @@ -769,7 +855,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex insns_cnt, func_info, cnt); } -int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **line_info, __u32 *cnt) { @@ -786,3 +873,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) { return btf_ext->line_info.rec_size; } + +struct btf_dedup; + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); +static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_strings(struct btf_dedup *d); +static int btf_dedup_prim_types(struct btf_dedup *d); +static int btf_dedup_struct_types(struct btf_dedup *d); +static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_compact_types(struct btf_dedup *d); +static int btf_dedup_remap_types(struct btf_dedup *d); + +/* + * Deduplicate BTF types and strings. + * + * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF + * section with all BTF type descriptors and string data. It overwrites that + * memory in-place with deduplicated types and strings without any loss of + * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section + * is provided, all the strings referenced from .BTF.ext section are honored + * and updated to point to the right offsets after deduplication. + * + * If function returns with error, type/string data might be garbled and should + * be discarded. + * + * More verbose and detailed description of both problem btf_dedup is solving, + * as well as solution could be found at: + * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html + * + * Problem description and justification + * ===================================== + * + * BTF type information is typically emitted either as a result of conversion + * from DWARF to BTF or directly by compiler. In both cases, each compilation + * unit contains information about a subset of all the types that are used + * in an application. These subsets are frequently overlapping and contain a lot + * of duplicated information when later concatenated together into a single + * binary. This algorithm ensures that each unique type is represented by single + * BTF type descriptor, greatly reducing resulting size of BTF data. + * + * Compilation unit isolation and subsequent duplication of data is not the only + * problem. The same type hierarchy (e.g., struct and all the type that struct + * references) in different compilation units can be represented in BTF to + * various degrees of completeness (or, rather, incompleteness) due to + * struct/union forward declarations. + * + * Let's take a look at an example, that we'll use to better understand the + * problem (and solution). Suppose we have two compilation units, each using + * same `struct S`, but each of them having incomplete type information about + * struct's fields: + * + * // CU #1: + * struct S; + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * // CU #2: + * struct S; + * struct A; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * In case of CU #1, BTF data will know only that `struct B` exist (but no + * more), but will know the complete type information about `struct A`. While + * for CU #2, it will know full type information about `struct B`, but will + * only know about forward declaration of `struct A` (in BTF terms, it will + * have `BTF_KIND_FWD` type descriptor with name `B`). + * + * This compilation unit isolation means that it's possible that there is no + * single CU with complete type information describing structs `S`, `A`, and + * `B`. Also, we might get tons of duplicated and redundant type information. + * + * Additional complication we need to keep in mind comes from the fact that + * types, in general, can form graphs containing cycles, not just DAGs. + * + * While algorithm does deduplication, it also merges and resolves type + * information (unless disabled throught `struct btf_opts`), whenever possible. + * E.g., in the example above with two compilation units having partial type + * information for structs `A` and `B`, the output of algorithm will emit + * a single copy of each BTF type that describes structs `A`, `B`, and `S` + * (as well as type information for `int` and pointers), as if they were defined + * in a single compilation unit as: + * + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * Algorithm summary + * ================= + * + * Algorithm completes its work in 6 separate passes: + * + * 1. Strings deduplication. + * 2. Primitive types deduplication (int, enum, fwd). + * 3. Struct/union types deduplication. + * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * protos, and const/volatile/restrict modifiers). + * 5. Types compaction. + * 6. Types remapping. + * + * Algorithm determines canonical type descriptor, which is a single + * representative type for each truly unique type. This canonical type is the + * one that will go into final deduplicated BTF type information. For + * struct/unions, it is also the type that algorithm will merge additional type + * information into (while resolving FWDs), as it discovers it from data in + * other CUs. Each input BTF type eventually gets either mapped to itself, if + * that type is canonical, or to some other type, if that type is equivalent + * and was chosen as canonical representative. This mapping is stored in + * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that + * FWD type got resolved to. + * + * To facilitate fast discovery of canonical types, we also maintain canonical + * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash + * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types + * that match that signature. With sufficiently good choice of type signature + * hashing function, we can limit number of canonical types for each unique type + * signature to a very small number, allowing to find canonical type for any + * duplicated type very quickly. + * + * Struct/union deduplication is the most critical part and algorithm for + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + int err; + + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return -EINVAL; + } + + err = btf_dedup_strings(d); + if (err < 0) { + pr_debug("btf_dedup_strings failed:%d\n", err); + goto done; + } + err = btf_dedup_prim_types(d); + if (err < 0) { + pr_debug("btf_dedup_prim_types failed:%d\n", err); + goto done; + } + err = btf_dedup_struct_types(d); + if (err < 0) { + pr_debug("btf_dedup_struct_types failed:%d\n", err); + goto done; + } + err = btf_dedup_ref_types(d); + if (err < 0) { + pr_debug("btf_dedup_ref_types failed:%d\n", err); + goto done; + } + err = btf_dedup_compact_types(d); + if (err < 0) { + pr_debug("btf_dedup_compact_types failed:%d\n", err); + goto done; + } + err = btf_dedup_remap_types(d); + if (err < 0) { + pr_debug("btf_dedup_remap_types failed:%d\n", err); + goto done; + } + +done: + btf_dedup_free(d); + return err; +} + +#define BTF_DEDUP_TABLE_SIZE_LOG 14 +#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1) +#define BTF_UNPROCESSED_ID ((__u32)-1) +#define BTF_IN_PROGRESS_ID ((__u32)-2) + +struct btf_dedup_node { + struct btf_dedup_node *next; + __u32 type_id; +}; + +struct btf_dedup { + /* .BTF section to be deduped in-place */ + struct btf *btf; + /* + * Optional .BTF.ext section. When provided, any strings referenced + * from it will be taken into account when deduping strings + */ + struct btf_ext *btf_ext; + /* + * This is a map from any type's signature hash to a list of possible + * canonical representative type candidates. Hash collisions are + * ignored, so even types of various kinds can share same list of + * candidates, which is fine because we rely on subsequent + * btf_xxx_equal() checks to authoritatively verify type equality. + */ + struct btf_dedup_node **dedup_table; + /* Canonical types map */ + __u32 *map; + /* Hypothetical mapping, used during type graph equivalence checks */ + __u32 *hypot_map; + __u32 *hypot_list; + size_t hypot_cnt; + size_t hypot_cap; + /* Various option modifying behavior of algorithm */ + struct btf_dedup_opts opts; +}; + +struct btf_str_ptr { + const char *str; + __u32 new_off; + bool used; +}; + +struct btf_str_ptrs { + struct btf_str_ptr *ptrs; + const char *data; + __u32 cnt; + __u32 cap; +}; + +static inline __u32 hash_combine(__u32 h, __u32 value) +{ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL + return h * 37 + value * GOLDEN_RATIO_PRIME; +#undef GOLDEN_RATIO_PRIME +} + +#define for_each_hash_node(table, hash, node) \ + for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next) + +static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) +{ + struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); + + if (!node) + return -ENOMEM; + node->type_id = type_id; + node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD]; + d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node; + return 0; +} + +static int btf_dedup_hypot_map_add(struct btf_dedup *d, + __u32 from_id, __u32 to_id) +{ + if (d->hypot_cnt == d->hypot_cap) { + __u32 *new_list; + + d->hypot_cap += max(16, d->hypot_cap / 2); + new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + if (!new_list) + return -ENOMEM; + d->hypot_list = new_list; + } + d->hypot_list[d->hypot_cnt++] = from_id; + d->hypot_map[from_id] = to_id; + return 0; +} + +static void btf_dedup_clear_hypot_map(struct btf_dedup *d) +{ + int i; + + for (i = 0; i < d->hypot_cnt; i++) + d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; + d->hypot_cnt = 0; +} + +static void btf_dedup_table_free(struct btf_dedup *d) +{ + struct btf_dedup_node *head, *tmp; + int i; + + if (!d->dedup_table) + return; + + for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) { + while (d->dedup_table[i]) { + tmp = d->dedup_table[i]; + d->dedup_table[i] = tmp->next; + free(tmp); + } + + head = d->dedup_table[i]; + while (head) { + tmp = head; + head = head->next; + free(tmp); + } + } + + free(d->dedup_table); + d->dedup_table = NULL; +} + +static void btf_dedup_free(struct btf_dedup *d) +{ + btf_dedup_table_free(d); + + free(d->map); + d->map = NULL; + + free(d->hypot_map); + d->hypot_map = NULL; + + free(d->hypot_list); + d->hypot_list = NULL; + + free(d); +} + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + int i, err = 0; + + if (!d) + return ERR_PTR(-ENOMEM); + + d->btf = btf; + d->btf_ext = btf_ext; + + d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG, + sizeof(struct btf_dedup_node *)); + if (!d->dedup_table) { + err = -ENOMEM; + goto done; + } + + d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->map) { + err = -ENOMEM; + goto done; + } + /* special BTF "void" type is made canonical immediately */ + d->map[0] = 0; + for (i = 1; i <= btf->nr_types; i++) + d->map[i] = BTF_UNPROCESSED_ID; + + d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->hypot_map) { + err = -ENOMEM; + goto done; + } + for (i = 0; i <= btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + +done: + if (err) { + btf_dedup_free(d); + return ERR_PTR(err); + } + + return d; +} + +typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); + +/* + * Iterate over all possible places in .BTF and .BTF.ext that can reference + * string and pass pointer to it to a provided callback `fn`. + */ +static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) +{ + void *line_data_cur, *line_data_end; + int i, j, r, rec_size; + struct btf_type *t; + + for (i = 1; i <= d->btf->nr_types; i++) { + t = d->btf->types[i]; + r = fn(&t->name_off, ctx); + if (r) + return r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_ENUM: { + struct btf_enum *m = (struct btf_enum *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *m = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + default: + break; + } + } + + if (!d->btf_ext) + return 0; + + line_data_cur = d->btf_ext->line_info.info; + line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; + rec_size = d->btf_ext->line_info.rec_size; + + while (line_data_cur < line_data_end) { + struct btf_ext_info_sec *sec = line_data_cur; + struct bpf_line_info_min *line_info; + __u32 num_info = sec->num_info; + + r = fn(&sec->sec_name_off, ctx); + if (r) + return r; + + line_data_cur += sizeof(struct btf_ext_info_sec); + for (i = 0; i < num_info; i++) { + line_info = line_data_cur; + r = fn(&line_info->file_name_off, ctx); + if (r) + return r; + r = fn(&line_info->line_off, ctx); + if (r) + return r; + line_data_cur += rec_size; + } + } + + return 0; +} + +static int str_sort_by_content(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + return strcmp(p1->str, p2->str); +} + +static int str_sort_by_offset(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + if (p1->str != p2->str) + return p1->str < p2->str ? -1 : 1; + return 0; +} + +static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) +{ + const struct btf_str_ptr *p = pelem; + + if (str_ptr != p->str) + return (const char *)str_ptr < p->str ? -1 : 1; + return 0; +} + +static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + s->used = true; + return 0; +} + +static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + *str_off_ptr = s->new_off; + return 0; +} + +/* + * Dedup string and filter out those that are not referenced from either .BTF + * or .BTF.ext (if provided) sections. + * + * This is done by building index of all strings in BTF's string section, + * then iterating over all entities that can reference strings (e.g., type + * names, struct field names, .BTF.ext line info, etc) and marking corresponding + * strings as used. After that all used strings are deduped and compacted into + * sequential blob of memory and new offsets are calculated. Then all the string + * references are iterated again and rewritten using new offsets. + */ +static int btf_dedup_strings(struct btf_dedup *d) +{ + const struct btf_header *hdr = d->btf->hdr; + char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *end = start + d->btf->hdr->str_len; + char *p = start, *tmp_strs = NULL; + struct btf_str_ptrs strs = { + .cnt = 0, + .cap = 0, + .ptrs = NULL, + .data = start, + }; + int i, j, err = 0, grp_idx; + bool grp_used; + + /* build index of all strings */ + while (p < end) { + if (strs.cnt + 1 > strs.cap) { + struct btf_str_ptr *new_ptrs; + + strs.cap += max(strs.cnt / 2, 16); + new_ptrs = realloc(strs.ptrs, + sizeof(strs.ptrs[0]) * strs.cap); + if (!new_ptrs) { + err = -ENOMEM; + goto done; + } + strs.ptrs = new_ptrs; + } + + strs.ptrs[strs.cnt].str = p; + strs.ptrs[strs.cnt].used = false; + + p += strlen(p) + 1; + strs.cnt++; + } + + /* temporary storage for deduplicated strings */ + tmp_strs = malloc(d->btf->hdr->str_len); + if (!tmp_strs) { + err = -ENOMEM; + goto done; + } + + /* mark all used strings */ + strs.ptrs[0].used = true; + err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); + if (err) + goto done; + + /* sort strings by context, so that we can identify duplicates */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + + /* + * iterate groups of equal strings and if any instance in a group was + * referenced, emit single instance and remember new offset + */ + p = tmp_strs; + grp_idx = 0; + grp_used = strs.ptrs[0].used; + /* iterate past end to avoid code duplication after loop */ + for (i = 1; i <= strs.cnt; i++) { + /* + * when i == strs.cnt, we want to skip string comparison and go + * straight to handling last group of strings (otherwise we'd + * need to handle last group after the loop w/ duplicated code) + */ + if (i < strs.cnt && + !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { + grp_used = grp_used || strs.ptrs[i].used; + continue; + } + + /* + * this check would have been required after the loop to handle + * last group of strings, but due to <= condition in a loop + * we avoid that duplication + */ + if (grp_used) { + int new_off = p - tmp_strs; + __u32 len = strlen(strs.ptrs[grp_idx].str); + + memmove(p, strs.ptrs[grp_idx].str, len + 1); + for (j = grp_idx; j < i; j++) + strs.ptrs[j].new_off = new_off; + p += len + 1; + } + + if (i < strs.cnt) { + grp_idx = i; + grp_used = strs.ptrs[i].used; + } + } + + /* replace original strings with deduped ones */ + d->btf->hdr->str_len = p - tmp_strs; + memmove(start, tmp_strs, d->btf->hdr->str_len); + end = start + d->btf->hdr->str_len; + + /* restore original order for further binary search lookups */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + + /* remap string offsets */ + err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + if (err) + goto done; + + d->btf->hdr->str_len = end - start; + +done: + free(tmp_strs); + free(strs.ptrs); + return err; +} + +static __u32 btf_hash_common(struct btf_type *t) +{ + __u32 h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + h = hash_combine(h, t->size); + return h; +} + +static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info && + t1->size == t2->size; +} + +/* Calculate type signature hash of INT. */ +static __u32 btf_hash_int(struct btf_type *t) +{ + __u32 info = *(__u32 *)(t + 1); + __u32 h; + + h = btf_hash_common(t); + h = hash_combine(h, info); + return h; +} + +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +{ + __u32 info1, info2; + + if (!btf_equal_common(t1, t2)) + return false; + info1 = *(__u32 *)(t1 + 1); + info2 = *(__u32 *)(t2 + 1); + return info1 == info2; +} + +/* Calculate type signature hash of ENUM. */ +static __u32 btf_hash_enum(struct btf_type *t) +{ + struct btf_enum *member = (struct btf_enum *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->val); + member++; + } + return h; +} + +/* Check structural equality of two ENUMs. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_enum *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_enum *)(t1 + 1); + m2 = (struct btf_enum *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val != m2->val) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static __u32 btf_hash_struct(struct btf_type *t) +{ + struct btf_member *member = (struct btf_member *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->offset); + /* no hashing of referenced type ID, it can be unresolved yet */ + member++; + } + return h; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_member *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_member *)(t1 + 1); + m2 = (struct btf_member *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->offset != m2->offset) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of ARRAY, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static __u32 btf_hash_array(struct btf_type *t) +{ + struct btf_array *info = (struct btf_array *)(t + 1); + __u32 h = btf_hash_common(t); + + h = hash_combine(h, info->type); + h = hash_combine(h, info->index_type); + h = hash_combine(h, info->nelems); + return h; +} + +/* + * Check exact equality of two ARRAYs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * ARRAY to potential canonical representative. + */ +static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->type == info2->type && + info1->index_type == info2->index_type && + info1->nelems == info2->nelems; +} + +/* + * Check structural compatibility of two ARRAYs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->nelems == info2->nelems; +} + +/* + * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static inline __u32 btf_hash_fnproto(struct btf_type *t) +{ + struct btf_param *member = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->type); + member++; + } + return h; +} + +/* + * Check exact equality of two FUNC_PROTOs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * FUNC_PROTO to potential canonical representative. + */ +static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->type != m2->type) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + /* skip return type ID */ + if (t1->name_off != t2->name_off || t1->info != t2->info) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Deduplicate primitive types, that can't reference other types, by calculating + * their type signature hash and comparing them with any possible canonical + * candidate. If no canonical candidate matches, type itself is marked as + * canonical and is added into `btf_dedup->dedup_table` as another candidate. + */ +static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + struct btf_type *cand; + struct btf_dedup_node *cand_node; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u32 h; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + return 0; + + case BTF_KIND_INT: + h = btf_hash_int(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_int(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_enum(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_FWD: + h = btf_hash_common(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_prim_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Check whether type is already mapped into canonical one (could be to itself). + */ +static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) +{ + return d->map[type_id] <= BTF_MAX_NR_TYPES; +} + +/* + * Resolve type ID into its canonical type ID, if any; otherwise return original + * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow + * STRUCT/UNION link and resolve it into canonical type ID as well. + */ +static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) +{ + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + return type_id; +} + +/* + * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original + * type ID. + */ +static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) +{ + __u32 orig_type_id = type_id; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + return orig_type_id; +} + + +static inline __u16 btf_fwd_kind(struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT; +} + +/* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph + * formed by (potential) canonical struct/union ("canonical graph" for brevity + * here, though keep in mind that not all types in canonical graph are + * necessarily canonical representatives themselves, some of them might be + * duplicates or its uniqueness might not have been established yet). + * Returns: + * - >0, if type graphs are equivalent; + * - 0, if not equivalent; + * - <0, on error. + * + * Algorithm performs side-by-side DFS traversal of both type graphs and checks + * equivalence of BTF types at each step. If at any point BTF types in candidate + * and canonical graphs are not compatible structurally, whole graphs are + * incompatible. If types are structurally equivalent (i.e., all information + * except referenced type IDs is exactly the same), a mapping from `canon_id` to + * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * If a type references other types, then those referenced types are checked + * for equivalence recursively. + * + * During DFS traversal, if we find that for current `canon_id` type we + * already have some mapping in hypothetical map, we check for two possible + * situations: + * - `canon_id` is mapped to exactly the same type as `cand_id`. This will + * happen when type graphs have cycles. In this case we assume those two + * types are equivalent. + * - `canon_id` is mapped to different type. This is contradiction in our + * hypothetical mapping, because same graph in canonical graph corresponds + * to two different types in candidate graph, which for equivalent type + * graphs shouldn't happen. This condition terminates equivalence check + * with negative result. + * + * If type graphs traversal exhausts types to check and find no contradiction, + * then type graphs are equivalent. + * + * When checking types for equivalence, there is one special case: FWD types. + * If FWD type resolution is allowed and one of the types (either from canonical + * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind + * flag) and their names match, hypothetical mapping is updated to point from + * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, + * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. + * + * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, + * if there are two exactly named (or anonymous) structs/unions that are + * compatible structurally, one of which has FWD field, while other is concrete + * STRUCT/UNION, but according to C sources they are different structs/unions + * that are referencing different types with the same name. This is extremely + * unlikely to happen, but btf_dedup API allows to disable FWD resolution if + * this logic is causing problems. + * + * Doing FWD resolution means that both candidate and/or canonical graphs can + * consists of portions of the graph that come from multiple compilation units. + * This is due to the fact that types within single compilation unit are always + * deduplicated and FWDs are already resolved, if referenced struct/union + * definiton is available. So, if we had unresolved FWD and found corresponding + * STRUCT/UNION, they will be from different compilation units. This + * consequently means that when we "link" FWD to corresponding STRUCT/UNION, + * type graph will likely have at least two different BTF types that describe + * same type (e.g., most probably there will be two different BTF types for the + * same 'int' primitive type) and could even have "overlapping" parts of type + * graph that describe same subset of types. + * + * This in turn means that our assumption that each type in canonical graph + * must correspond to exactly one type in candidate graph might not hold + * anymore and will make it harder to detect contradictions using hypothetical + * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION + * resolution only in canonical graph. FWDs in candidate graphs are never + * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs + * that can occur: + * - Both types in canonical and candidate graphs are FWDs. If they are + * structurally equivalent, then they can either be both resolved to the + * same STRUCT/UNION or not resolved at all. In both cases they are + * equivalent and there is no need to resolve FWD on candidate side. + * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, + * so nothing to resolve as well, algorithm will check equivalence anyway. + * - Type in canonical graph is FWD, while type in candidate is concrete + * STRUCT/UNION. In this case candidate graph comes from single compilation + * unit, so there is exactly one BTF type for each unique C type. After + * resolving FWD into STRUCT/UNION, there might be more than one BTF type + * in canonical graph mapping to single BTF type in candidate graph, but + * because hypothetical mapping maps from canonical to candidate types, it's + * alright, and we still maintain the property of having single `canon_id` + * mapping to single `cand_id` (there could be two different `canon_id` + * mapped to the same `cand_id`, but it's not contradictory). + * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate + * graph is FWD. In this case we are just going to check compatibility of + * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll + * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to + * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs + * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from + * canonical graph. + */ +static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, + __u32 canon_id) +{ + struct btf_type *cand_type; + struct btf_type *canon_type; + __u32 hypot_type_id; + __u16 cand_kind; + __u16 canon_kind; + int i, eq; + + /* if both resolve to the same canonical, they must be equivalent */ + if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) + return 1; + + canon_id = resolve_fwd_id(d, canon_id); + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_NR_TYPES) + return hypot_type_id == cand_id; + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) + return -ENOMEM; + + cand_type = d->btf->types[cand_id]; + canon_type = d->btf->types[canon_id]; + cand_kind = BTF_INFO_KIND(cand_type->info); + canon_kind = BTF_INFO_KIND(canon_type->info); + + if (cand_type->name_off != canon_type->name_off) + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; + + if (cand_kind == BTF_KIND_FWD) { + real_kind = canon_kind; + fwd_kind = btf_fwd_kind(cand_type); + } else { + real_kind = cand_kind; + fwd_kind = btf_fwd_kind(canon_type); + } + return fwd_kind == real_kind; + } + + if (cand_type->info != canon_type->info) + return 0; + + switch (cand_kind) { + case BTF_KIND_INT: + return btf_equal_int(cand_type, canon_type); + + case BTF_KIND_ENUM: + return btf_equal_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + return btf_equal_common(cand_type, canon_type); + + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + + case BTF_KIND_ARRAY: { + struct btf_array *cand_arr, *canon_arr; + + if (!btf_compat_array(cand_type, canon_type)) + return 0; + cand_arr = (struct btf_array *)(cand_type + 1); + canon_arr = (struct btf_array *)(canon_type + 1); + eq = btf_dedup_is_equiv(d, + cand_arr->index_type, canon_arr->index_type); + if (eq <= 0) + return eq; + return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *cand_m, *canon_m; + __u16 vlen; + + if (!btf_equal_struct(cand_type, canon_type)) + return 0; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_m = (struct btf_member *)(cand_type + 1); + canon_m = (struct btf_member *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); + if (eq <= 0) + return eq; + cand_m++; + canon_m++; + } + + return 1; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *cand_p, *canon_p; + __u16 vlen; + + if (!btf_compat_fnproto(cand_type, canon_type)) + return 0; + eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + if (eq <= 0) + return eq; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_p = (struct btf_param *)(cand_type + 1); + canon_p = (struct btf_param *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); + if (eq <= 0) + return eq; + cand_p++; + canon_p++; + } + return 1; + } + + default: + return -EINVAL; + } + return 0; +} + +/* + * Use hypothetical mapping, produced by successful type graph equivalence + * check, to augment existing struct/union canonical mapping, where possible. + * + * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record + * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: + * it doesn't matter if FWD type was part of canonical graph or candidate one, + * we are recording the mapping anyway. As opposed to carefulness required + * for struct/union correspondence mapping (described below), for FWD resolution + * it's not important, as by the time that FWD type (reference type) will be + * deduplicated all structs/unions will be deduped already anyway. + * + * Recording STRUCT/UNION mapping is purely a performance optimization and is + * not required for correctness. It needs to be done carefully to ensure that + * struct/union from candidate's type graph is not mapped into corresponding + * struct/union from canonical type graph that itself hasn't been resolved into + * canonical representative. The only guarantee we have is that canonical + * struct/union was determined as canonical and that won't change. But any + * types referenced through that struct/union fields could have been not yet + * resolved, so in case like that it's too early to establish any kind of + * correspondence between structs/unions. + * + * No canonical correspondence is derived for primitive types (they are already + * deduplicated completely already anyway) or reference types (they rely on + * stability of struct/union canonical relationship for equivalence checks). + */ +static void btf_dedup_merge_hypot_map(struct btf_dedup *d) +{ + __u32 cand_type_id, targ_type_id; + __u16 t_kind, c_kind; + __u32 t_id, c_id; + int i; + + for (i = 0; i < d->hypot_cnt; i++) { + cand_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[cand_type_id]; + t_id = resolve_type_id(d, targ_type_id); + c_id = resolve_type_id(d, cand_type_id); + t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info); + c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info); + /* + * Resolve FWD into STRUCT/UNION. + * It's ok to resolve FWD into STRUCT/UNION that's not yet + * mapped to canonical representative (as opposed to + * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because + * eventually that struct is going to be mapped and all resolved + * FWDs will automatically resolve to correct canonical + * representative. This will happen before ref type deduping, + * which critically depends on stability of these mapping. This + * stability is not a requirement for STRUCT/UNION equivalence + * checks, though. + */ + if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) + d->map[c_id] = t_id; + else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + d->map[t_id] = c_id; + + if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && + c_kind != BTF_KIND_FWD && + is_type_mapped(d, c_id) && + !is_type_mapped(d, t_id)) { + /* + * as a perf optimization, we can map struct/union + * that's part of type graph we just verified for + * equivalence. We can do that for struct/union that has + * canonical representative only, though. + */ + d->map[t_id] = c_id; + } + } +} + +/* + * Deduplicate struct/union types. + * + * For each struct/union type its type signature hash is calculated, taking + * into account type's name, size, number, order and names of fields, but + * ignoring type ID's referenced from fields, because they might not be deduped + * completely until after reference types deduplication phase. This type hash + * is used to iterate over all potential canonical types, sharing same hash. + * For each canonical candidate we check whether type graphs that they form + * (through referenced types in fields and so on) are equivalent using algorithm + * implemented in `btf_dedup_is_equiv`. If such equivalence is found and + * BTF_KIND_FWD resolution is allowed, then hypothetical mapping + * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence + * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to + * potentially map other structs/unions to their canonical representatives, + * if such relationship hasn't yet been established. This speeds up algorithm + * by eliminating some of the duplicate work. + * + * If no matching canonical representative was found, struct/union is marked + * as canonical for itself and is added into btf_dedup->dedup_table hash map + * for further look ups. + */ +static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u16 kind; + __u32 h; + + /* already deduped or is in process of deduping (loop detected) */ + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return 0; + + t = d->btf->types[type_id]; + kind = BTF_INFO_KIND(t->info); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + return 0; + + h = btf_hash_struct(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + int eq; + + btf_dedup_clear_hypot_map(d); + eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); + if (eq < 0) + return eq; + if (!eq) + continue; + new_id = cand_node->type_id; + btf_dedup_merge_hypot_map(d); + break; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_struct_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Deduplicate reference type. + * + * Once all primitive and struct/union types got deduplicated, we can easily + * deduplicate all other (reference) BTF types. This is done in two steps: + * + * 1. Resolve all referenced type IDs into their canonical type IDs. This + * resolution can be done either immediately for primitive or struct/union types + * (because they were deduped in previous two phases) or recursively for + * reference types. Recursion will always terminate at either primitive or + * struct/union type, at which point we can "unwind" chain of reference types + * one by one. There is no danger of encountering cycles because in C type + * system the only way to form type cycle is through struct/union, so any chain + * of reference types, even those taking part in a type cycle, will inevitably + * reach struct/union at some point. + * + * 2. Once all referenced type IDs are resolved into canonical ones, BTF type + * becomes "stable", in the sense that no further deduplication will cause + * any changes to it. With that, it's now possible to calculate type's signature + * hash (this time taking into account referenced type IDs) and loop over all + * potential canonical representatives. If no match was found, current type + * will become canonical representative of itself and will be added into + * btf_dedup->dedup_table as another possible canonical representative. + */ +static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t, *cand; + /* if we don't find equivalent type, then we are representative type */ + __u32 new_id = type_id; + __u32 h, ref_type_id; + + if (d->map[type_id] == BTF_IN_PROGRESS_ID) + return -ELOOP; + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return resolve_type_id(d, type_id); + + t = d->btf->types[type_id]; + d->map[type_id] = BTF_IN_PROGRESS_ID; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_common(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ARRAY: { + struct btf_array *info = (struct btf_array *)(t + 1); + + ref_type_id = btf_dedup_ref_type(d, info->type); + if (ref_type_id < 0) + return ref_type_id; + info->type = ref_type_id; + + ref_type_id = btf_dedup_ref_type(d, info->index_type); + if (ref_type_id < 0) + return ref_type_id; + info->index_type = ref_type_id; + + h = btf_hash_array(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_array(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param; + __u16 vlen; + int i; + + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + vlen = BTF_INFO_VLEN(t->info); + param = (struct btf_param *)(t + 1); + for (i = 0; i < vlen; i++) { + ref_type_id = btf_dedup_ref_type(d, param->type); + if (ref_type_id < 0) + return ref_type_id; + param->type = ref_type_id; + param++; + } + + h = btf_hash_fnproto(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_fnproto(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return new_id; +} + +static int btf_dedup_ref_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, i); + if (err < 0) + return err; + } + btf_dedup_table_free(d); + return 0; +} + +/* + * Compact types. + * + * After we established for each type its corresponding canonical representative + * type, we now can eliminate types that are not canonical and leave only + * canonical ones layed out sequentially in memory by copying them over + * duplicates. During compaction btf_dedup->hypot_map array is reused to store + * a map from original type ID to a new compacted type ID, which will be used + * during next phase to "fix up" type IDs, referenced from struct/union and + * reference types. + */ +static int btf_dedup_compact_types(struct btf_dedup *d) +{ + struct btf_type **new_types; + __u32 next_type_id = 1; + char *types_start, *p; + int i, len; + + /* we are going to reuse hypot_map to store compaction remapping */ + d->hypot_map[0] = 0; + for (i = 1; i <= d->btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + types_start = d->btf->nohdr_data + d->btf->hdr->type_off; + p = types_start; + + for (i = 1; i <= d->btf->nr_types; i++) { + if (d->map[i] != i) + continue; + + len = btf_type_size(d->btf->types[i]); + if (len < 0) + return len; + + memmove(p, d->btf->types[i], len); + d->hypot_map[i] = next_type_id; + d->btf->types[next_type_id] = (struct btf_type *)p; + p += len; + next_type_id++; + } + + /* shrink struct btf's internal types index and update btf_header */ + d->btf->nr_types = next_type_id - 1; + d->btf->types_size = d->btf->nr_types; + d->btf->hdr->type_len = p - types_start; + new_types = realloc(d->btf->types, + (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + if (!new_types) + return -ENOMEM; + d->btf->types = new_types; + + /* make sure string section follows type information without gaps */ + d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + memmove(p, d->btf->strings, d->btf->hdr->str_len); + d->btf->strings = p; + p += d->btf->hdr->str_len; + + d->btf->data_size = p - (char *)d->btf->data; + return 0; +} + +/* + * Figure out final (deduplicated and compacted) type ID for provided original + * `type_id` by first resolving it into corresponding canonical type ID and + * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, + * which is populated during compaction phase. + */ +static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) +{ + __u32 resolved_type_id, new_type_id; + + resolved_type_id = resolve_type_id(d, type_id); + new_type_id = d->hypot_map[resolved_type_id]; + if (new_type_id > BTF_MAX_NR_TYPES) + return -EINVAL; + return new_type_id; +} + +/* + * Remap referenced type IDs into deduped type IDs. + * + * After BTF types are deduplicated and compacted, their final type IDs may + * differ from original ones. The map from original to a corresponding + * deduped type ID is stored in btf_dedup->hypot_map and is populated during + * compaction phase. During remapping phase we are rewriting all type IDs + * referenced from any BTF type (e.g., struct fields, func proto args, etc) to + * their final deduped type IDs. + */ +static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + int i, r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + break; + + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + break; + + case BTF_KIND_ARRAY: { + struct btf_array *arr_info = (struct btf_array *)(t + 1); + + r = btf_dedup_remap_type_id(d, arr_info->type); + if (r < 0) + return r; + arr_info->type = r; + r = btf_dedup_remap_type_id(d, arr_info->index_type); + if (r < 0) + return r; + arr_info->index_type = r; + break; + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *member = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, member->type); + if (r < 0) + return r; + member->type = r; + member++; + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, param->type); + if (r < 0) + return r; + param->type = r; + param++; + } + break; + } + + default: + return -EINVAL; + } + + return 0; +} + +static int btf_dedup_remap_types(struct btf_dedup *d) +{ + int i, r; + + for (i = 1; i <= d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, i); + if (r < 0) + return r; + } + return 0; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b0610dcdae6b..94bbc249b0f1 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -55,33 +55,46 @@ struct btf_ext_header { __u32 line_info_len; }; -typedef int (*btf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); - LIBBPF_API void btf__free(struct btf *btf); -LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); + +LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext, + __u32 *size); +LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +struct btf_dedup_opts { + bool dont_resolve_fwds; +}; -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); -void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *func_info_len); -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 03bc01ca2577..b38dcbe7460a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -42,6 +42,7 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" +#include "libbpf_util.h" #ifndef EM_BPF #define EM_BPF 247 @@ -53,39 +54,33 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(1, 2) -static int __base_pr(const char *format, ...) +static int __base_pr(enum libbpf_print_level level, const char *format, + va_list args) { - va_list args; - int err; + if (level == LIBBPF_DEBUG) + return 0; - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } -static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_debug; +static libbpf_print_fn_t __libbpf_pr = __base_pr; -#define __pr(func, fmt, ...) \ -do { \ - if ((func)) \ - (func)("libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) +void libbpf_set_print(libbpf_print_fn_t fn) +{ + __libbpf_pr = fn; +} -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug) +__printf(2, 3) +void libbpf_print(enum libbpf_print_level level, const char *format, ...) { - __pr_warning = warn; - __pr_info = info; - __pr_debug = debug; + va_list args; + + if (!__libbpf_pr) + return; + + va_start(args, format); + __libbpf_pr(level, format, args); + va_end(args); } #define STRERR_BUFSIZE 128 @@ -312,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, return -EINVAL; } - bzero(prog, sizeof(*prog)); + memset(prog, 0, sizeof(*prog)); prog->section_name = strdup(section_name); if (!prog->section_name) { @@ -839,9 +834,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size, - __pr_debug); - if (IS_ERR(obj->btf)) { + obj->btf = btf__new(data->d_buf, data->d_size); + if (IS_ERR(obj->btf) || btf__load(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; @@ -915,8 +909,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_EXT_ELF_SEC, BTF_ELF_SEC); } else { obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size, - __pr_debug); + btf_ext_data->d_size); if (IS_ERR(obj->btf_ext)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_EXT_ELF_SEC, @@ -1057,72 +1050,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { - const struct btf_type *container_type; - const struct btf_member *key, *value; struct bpf_map_def *def = &map->def; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == - max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map->name, map->name); - return -EINVAL; - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map->name, container_name); - return container_id; - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map->name, container_id); - return -EINVAL; - } - - if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || - BTF_INFO_VLEN(container_type->info) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map->name, container_name); - return -EINVAL; - } - - key = (struct btf_member *)(container_type + 1); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", - map->name); - return key_size; - } - - if (def->key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map->name, (__u32)key_size, def->key_size); - return -EINVAL; - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map->name); - return value_size; - } + __u32 key_type_id, value_type_id; + int ret; - if (def->value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map->name, (__u32)value_size, def->value_size); - return -EINVAL; - } + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + if (ret) + return ret; - map->btf_key_type_id = key->type; - map->btf_value_type_id = value->type; + map->btf_key_type_id = key_type_id; + map->btf_value_type_id = value_type_id; return 0; } @@ -1174,6 +1113,20 @@ err_free_new_name: return -errno; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + /* If map already created, its attributes can't be changed. */ + if (map->fd >= 0) + return -EBUSY; + + map->def.max_entries = max_entries; + + return 0; +} + static int bpf_object__probe_name(struct bpf_object *obj) { @@ -1637,7 +1590,7 @@ bpf_program__load(struct bpf_program *prog, struct bpf_prog_prep_result result; bpf_program_prep_t preprocessor = prog->preprocessor; - bzero(&result, sizeof(result)); + memset(&result, 0, sizeof(result)); err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { @@ -2378,6 +2331,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +struct btf *bpf_object__btf(struct bpf_object *obj) +{ + return obj ? obj->btf : NULL; +} + int bpf_object__btf_fd(const struct bpf_object *obj) { return obj->btf ? btf__fd(obj->btf) : -1; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 43c77e98df6f..6c0168f8bba5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -47,17 +47,16 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); -/* - * __printf is defined in include/linux/compiler-gcc.h. However, - * it would be better if libbpf.h didn't depend on Linux header files. - * So instead of __printf, here we use gcc attribute directly. - */ -typedef int (*libbpf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; -LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, + const char *, va_list ap); + +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; @@ -90,6 +89,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); + +struct btf; +LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * @@ -295,6 +297,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 62c680fb13d1..99dfa710c818 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -130,7 +130,21 @@ LIBBPF_0.0.2 { bpf_probe_helper; bpf_probe_map_type; bpf_probe_prog_type; + bpf_map__resize; bpf_map_lookup_elem_flags; + bpf_object__btf; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; + btf__dedup; + btf__get_map_kv_tids; + btf__get_nr_types; + btf__get_raw_data; + btf__load; + btf_ext__free; + btf_ext__func_info_rec_size; + btf_ext__get_raw_data; + btf_ext__line_info_rec_size; + btf_ext__new; + btf_ext__reloc_func_info; + btf_ext__reloc_line_info; } LIBBPF_0.0.1; diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h new file mode 100644 index 000000000000..81ecda0cb9c9 --- /dev/null +++ b/tools/lib/bpf/libbpf_util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __LIBBPF_LIBBPF_UTIL_H +#define __LIBBPF_LIBBPF_UTIL_H + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp index abf3fc25c9fa..fc134873bb6d 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.cpp @@ -8,11 +8,11 @@ int main(int argc, char *argv[]) { /* libbpf.h */ - libbpf_set_print(NULL, NULL, NULL); + libbpf_set_print(NULL); /* bpf.h */ bpf_prog_get_fd_by_id(0); /* btf.h */ - btf__new(NULL, 0, NULL); + btf__new(NULL, 0); } diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 095aebdc5bb7..e6150f21267d 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -19,8 +19,11 @@ C2C stands for Cache To Cache. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. -The tool is based on x86's load latency and precise store facility events -provided by Intel CPUs. These events provide: +On x86, the tool is based on load latency and precise store facility events +provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling +with thresholding feature. + +These events provide: - memory address of the access - type of the access (load and store details) - latency (in cycles) of the load access @@ -46,7 +49,7 @@ RECORD OPTIONS -l:: --ldlat:: - Configure mem-loads latency. + Configure mem-loads latency. (x86 only) -k:: --all-kernel:: @@ -119,11 +122,16 @@ Following perf record options are configured by default: -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by -default: +default on x86: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P +and following on PowerPC: + + cpu/mem-loads/ + cpu/mem-stores/ + User can pass any 'perf record' option behind '--' mark, like (to enable callchains and system wide monitoring): diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index f8d2167cf3e7..199ea0f0a6c0 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -82,7 +82,7 @@ RECORD OPTIONS Be more verbose (show counter open errors, etc) --ldlat <n>:: - Specify desired latency for loads event. + Specify desired latency for loads event. (x86 only) In addition, for report all perf report options are valid, and for record all perf record options. diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 2e6595310420..ba98bd006488 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += mem-events.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c new file mode 100644 index 000000000000..d08311f04e95 --- /dev/null +++ b/tools/perf/arch/powerpc/util/mem-events.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "mem-events.h" + +/* PowerPC does not support 'ldlat' parameter. */ +char *perf_mem_events__name(int i) +{ + if (i == PERF_MEM_EVENTS__LOAD) + return (char *) "cpu/mem-loads/"; + + return (char *) "cpu/mem-stores/"; +} diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d079f36d342d..ac221f137ed2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1681,13 +1681,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, .force_header = false, }; struct perf_evsel *ev2; - static bool init; u64 val; - if (!init) { - perf_stat__init_shadow_stats(); - init = true; - } if (!evsel->stats) perf_evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) @@ -1794,7 +1789,7 @@ static void process_event(struct perf_script *script, return; } - if (PRINT_FIELD(TRACE)) { + if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } @@ -2359,6 +2354,8 @@ static int __cmd_script(struct perf_script *script) signal(SIGINT, sig_handler); + perf_stat__init_shadow_stats(); + /* override event processing functions */ if (script->show_task_events) { script->tool.comm = process_comm_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ed4583128b9c..b36061cd1ab8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (IS_ERR(evsel)) + if (ret) return false; - if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; } - evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; } static struct perf_evsel *perf_evsel__new_pgfault(u64 config) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index 44090a9a19f3..e952127e4fb0 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -1,6 +1,8 @@ #! /usr/bin/python # SPDX-License-Identifier: GPL-2.0 +from __future__ import print_function + import os import sys import glob @@ -8,7 +10,11 @@ import optparse import tempfile import logging import shutil -import ConfigParser + +try: + import configparser +except ImportError: + import ConfigParser as configparser def data_equal(a, b): # Allow multiple values in assignment separated by '|' @@ -100,20 +106,20 @@ class Event(dict): def equal(self, other): for t in Event.terms: log.debug(" [%s] %s %s" % (t, self[t], other[t])); - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: return False if not data_equal(self[t], other[t]): return False return True def optional(self): - if self.has_key('optional') and self['optional'] == '1': + if 'optional' in self and self['optional'] == '1': return True return False def diff(self, other): for t in Event.terms: - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: continue if not data_equal(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) @@ -134,7 +140,7 @@ class Event(dict): # - expected values assignments class Test(object): def __init__(self, path, options): - parser = ConfigParser.SafeConfigParser() + parser = configparser.SafeConfigParser() parser.read(path) log.warning("running '%s'" % path) @@ -193,7 +199,7 @@ class Test(object): return True def load_events(self, path, events): - parser_event = ConfigParser.SafeConfigParser() + parser_event = configparser.SafeConfigParser() parser_event.read(path) # The event record section header contains 'event' word, @@ -207,7 +213,7 @@ class Test(object): # Read parent event if there's any if (':' in section): base = section[section.index(':') + 1:] - parser_base = ConfigParser.SafeConfigParser() + parser_base = configparser.SafeConfigParser() parser_base.read(self.test_dir + '/' + base) base_items = parser_base.items('event') @@ -322,9 +328,9 @@ def run_tests(options): for f in glob.glob(options.test_dir + '/' + options.test): try: Test(f, options).run() - except Unsup, obj: + except Unsup as obj: log.warning("unsupp %s" % obj.getMsg()) - except Notest, obj: + except Notest as obj: log.warning("skipped %s" % obj.getMsg()) def setup_log(verbose): @@ -363,7 +369,7 @@ def main(): parser.add_option("-p", "--perf", action="store", type="string", dest="perf") parser.add_option("-v", "--verbose", - action="count", dest="verbose") + default=0, action="count", dest="verbose") options, args = parser.parse_args() if args: @@ -373,7 +379,7 @@ def main(): setup_log(options.verbose) if not options.test_dir: - print 'FAILED no -d option specified' + print('FAILED no -d option specified') sys.exit(-1) if not options.test: @@ -382,8 +388,8 @@ def main(): try: run_tests(options) - except Fail, obj: - print "FAILED %s" % obj.getMsg(); + except Fail as obj: + print("FAILED %s" % obj.getMsg()) sys.exit(-1) sys.exit(0) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 5f8501c68da4..5cbba70bcdd0 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED); + is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1d00e5ec7906..82e16bf84466 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -224,20 +224,24 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) +static double disasm__cmp(struct annotation_line *a, struct annotation_line *b, + int percent_type) { int i; for (i = 0; i < a->data_nr; i++) { - if (a->data[i].percent == b->data[i].percent) + if (a->data[i].percent[percent_type] == b->data[i].percent[percent_type]) continue; - return a->data[i].percent < b->data[i].percent; + return a->data[i].percent[percent_type] - + b->data[i].percent[percent_type]; } return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) +static void disasm_rb_tree__insert(struct annotate_browser *browser, + struct annotation_line *al) { + struct rb_root *root = &browser->entries; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct annotation_line *l; @@ -246,7 +250,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l)) + if (disasm__cmp(al, l, browser->opts->percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -329,7 +333,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al); + disasm_rb_tree__insert(browser, &pos->al); } pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2f3eb6d293ee..037d8ff6a634 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -24,22 +24,12 @@ #include "llvm-utils.h" #include "c++/clang-c.h" -#define DEFINE_PRINT_FN(name, level) \ -static int libbpf_##name(const char *fmt, ...) \ -{ \ - va_list args; \ - int ret; \ - \ - va_start(args, fmt); \ - ret = veprintf(level, verbose, pr_fmt(fmt), args);\ - va_end(args); \ - return ret; \ +static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), + const char *fmt, va_list args) +{ + return veprintf(1, verbose, pr_fmt(fmt), args); } -DEFINE_PRINT_FN(warning, 1) -DEFINE_PRINT_FN(info, 1) -DEFINE_PRINT_FN(debug, 1) - struct bpf_prog_priv { bool is_tp; char *sys_name; @@ -59,9 +49,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) struct bpf_object *obj; if (!libbpf_initialized) { - libbpf_set_print(libbpf_warning, - libbpf_info, - libbpf_debug); + libbpf_set_print(libbpf_perf_print); libbpf_initialized = true; } @@ -79,9 +67,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) struct bpf_object *obj; if (!libbpf_initialized) { - libbpf_set_print(libbpf_warning, - libbpf_info, - libbpf_debug); + libbpf_set_print(libbpf_perf_print); libbpf_initialized = true; } diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 89512504551b..39c0004f2886 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module) } PM.run(*Module); - return std::move(Buffer); + return Buffer; } } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd3342069..383674f448fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 93f74d8d3cdd..42c3e5a229d2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; -char *perf_mem_events__name(int i) +char * __weak perf_mem_events__name(int i) { if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 897589507d97..ea523d3b248f 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -391,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe) * Current buffer might not have all the events allocated * yet, we need to free only allocated ones ... */ - list_del(&oe->buffer->list); - ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + if (oe->buffer) { + list_del(&oe->buffer->list); + ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + } /* ... and continue with the rest */ list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 63f758c655d5..64d1f36dee99 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -17,6 +17,8 @@ if cc == "clang": vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) + if not clang_has_option("-fstack-clash-protection"): + vars[var] = sub("-fstack-clash-protection", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 66a84d5846c8..dca7dfae69ad 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -19,6 +19,20 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef ELF32_ST_VISIBILITY +#define ELF32_ST_VISIBILITY(o) ((o) & 0x03) +#endif + +/* For ELF64 the definitions are the same. */ +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o) +#endif + +/* How to extract information held in the st_other field. */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(val) ELF64_ST_VISIBILITY (val) +#endif + typedef Elf64_Nhdr GElf_Nhdr; #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT @@ -87,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -111,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__filter(GElf_Sym *sym) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1a2bd15c5b6e..400ee81a3043 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -10,6 +10,7 @@ TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec TARGETS += filesystems +TARGETS += filesystems/binderfs TARGETS += firmware TARGETS += ftrace TARGETS += futex diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index dd093bd91aa9..e47168d1257d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -29,3 +29,4 @@ test_netcnt test_section_names test_tcpnotify_user test_libbpf +alu32 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 383d2ff13fc7..ccffaa0a0787 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,42 +23,19 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt test_tcpnotify_user - -BPF_OBJ_FILES = \ - test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \ - test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \ - test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \ - test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \ - sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ - get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o - -# Objects are built with default compilation flags and with sub-register -# code-gen enabled. -BPF_OBJ_FILES_DUAL_COMPILE = \ - test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \ - test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \ - test_obj_id.o test_pkt_md_access.o test_tracepoint.o \ - test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \ - test_stacktrace_build_id.o test_get_stack_rawtp.o \ - test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \ - test_queue_map.o test_stack_map.o - -TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE) - -# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor -# support which is the first version to contain both ALU32 and JMP32 -# instructions. + test_netcnt test_tcpnotify_user test_sock_fields + +BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) +TEST_GEN_FILES = $(BPF_OBJ_FILES) + +# Also test sub-register code-gen if LLVM has eBPF v3 processor support which +# contains both ALU32 and JMP32 instructions. SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \ - $(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \ + $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \ grep 'if w') ifneq ($(SUBREG_CODEGEN),) -TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE)) +TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES)) endif # Order correspond to 'make run_tests' order @@ -73,7 +50,8 @@ TEST_PROGS := test_kmod.sh \ test_lirc_mode2.sh \ test_skb_cgroup_id.sh \ test_flow_dissector.sh \ - test_xdp_vlan.sh + test_xdp_vlan.sh \ + test_lwt_ip_encap.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -111,6 +89,7 @@ $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c +$(OUTPUT)/test_sock_fields: cgroup_helpers.c .PHONY: force @@ -188,7 +167,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \ $(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \ trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS) -$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32 +$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \ + $(ALU32_BUILD_DIR)/test_progs_32 $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ @@ -200,7 +180,7 @@ endif # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully -$(OUTPUT)/test_xdp.o: test_xdp.c +$(OUTPUT)/test_xdp.o: progs/test_xdp.c $(CLANG) $(CLANG_FLAGS) \ -O2 -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ @@ -208,7 +188,7 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -$(OUTPUT)/%.o: %.c +$(OUTPUT)/%.o: progs/%.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6a0ce0f055c5..d9999f1ed1d2 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -176,6 +176,10 @@ static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) BPF_FUNC_spin_lock; static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) BPF_FUNC_spin_unlock; +static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_fullsock; +static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_tcp_sock; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..a29206ebbd13 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); + } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; } - possible_cpus = start == 0 ? end + 1 : 0; - break; } + fclose(fp); return possible_cpus; @@ -48,4 +58,13 @@ static inline unsigned int bpf_num_possible_cpus(void) # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +#endif + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 284660f5aa95..284660f5aa95 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1fd244d35ba9..1fd244d35ba9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 26397ab7b3c7..26397ab7b3c7 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index ce41a3475f27..ce41a3475f27 100644 --- a/tools/testing/selftests/bpf/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 014dba10b8a5..014dba10b8a5 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 9f741e69cebe..9f741e69cebe 100644 --- a/tools/testing/selftests/bpf/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c index 0756303676ac..0756303676ac 100644 --- a/tools/testing/selftests/bpf/sample_map_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/progs/sample_ret0.c index fec99750d6ea..fec99750d6ea 100644 --- a/tools/testing/selftests/bpf/sample_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_ret0.c diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index a91536b1c47e..a91536b1c47e 100644 --- a/tools/testing/selftests/bpf/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 5aeaa284fc47..5aeaa284fc47 100644 --- a/tools/testing/selftests/bpf/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 9ff8ac4b0bf6..9ff8ac4b0bf6 100644 --- a/tools/testing/selftests/bpf/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 0f92858f6226..0f92858f6226 100644 --- a/tools/testing/selftests/bpf/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index 12a7b5c82ed6..12a7b5c82ed6 100644 --- a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 2ce7634a4012..2ce7634a4012 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c index 4cd5e860c903..4cd5e860c903 100644 --- a/tools/testing/selftests/bpf/test_adjust_tail.c +++ b/tools/testing/selftests/bpf/progs/test_adjust_tail.c diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index e5c79fe0ffdb..e5c79fe0ffdb 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 434188c37774..434188c37774 100644 --- a/tools/testing/selftests/bpf/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index f6d9f238e00a..f6d9f238e00a 100644 --- a/tools/testing/selftests/bpf/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 1e10c9590991..1e10c9590991 100644 --- a/tools/testing/selftests/bpf/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index ba44a14e6dc4..ba44a14e6dc4 100644 --- a/tools/testing/selftests/bpf/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c index 4147130cc3b7..4147130cc3b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c new file mode 100644 index 000000000000..c957d6dfe6d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct grehdr { + __be16 flags; + __be16 protocol; +}; + +SEC("encap_gre") +int bpf_lwt_encap_gre(struct __sk_buff *skb) +{ + struct encap_hdr { + struct iphdr iph; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.iph.ihl = 5; + hdr.iph.version = 4; + hdr.iph.ttl = 0x40; + hdr.iph.protocol = 47; /* IPPROTO_GRE */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */ + hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */ +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */ + hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */ +#else +#error "Fix your compiler's __BYTE_ORDER__?!" +#endif + hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr)); + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +SEC("encap_gre6") +int bpf_lwt_encap_gre6(struct __sk_buff *skb) +{ + struct encap_hdr { + struct ipv6hdr ip6hdr; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.ip6hdr.version = 6; + hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr)); + hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */ + hdr.ip6hdr.hop_limit = 0x40; + /* fb01::1 */ + hdr.ip6hdr.saddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.saddr.s6_addr[1] = 1; + hdr.ip6hdr.saddr.s6_addr[15] = 1; + /* fb10::1 */ + hdr.ip6hdr.daddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.daddr.s6_addr[1] = 0x10; + hdr.ip6hdr.daddr.s6_addr[15] = 1; + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 0575751bc1bc..0575751bc1bc 100644 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index ce923e67e08e..ce923e67e08e 100644 --- a/tools/testing/selftests/bpf/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c diff --git a/tools/testing/selftests/bpf/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index af8cc68ed2f9..af8cc68ed2f9 100644 --- a/tools/testing/selftests/bpf/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index 880d2963b472..880d2963b472 100644 --- a/tools/testing/selftests/bpf/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 6e11ba11709e..6e11ba11709e 100644 --- a/tools/testing/selftests/bpf/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c index 7956302ecdf2..7956302ecdf2 100644 --- a/tools/testing/selftests/bpf/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/progs/test_queue_map.c index 87db1f9da33d..87db1f9da33d 100644 --- a/tools/testing/selftests/bpf/test_queue_map.c +++ b/tools/testing/selftests/bpf/progs/test_queue_map.c diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 5b54ec637ada..5b54ec637ada 100644 --- a/tools/testing/selftests/bpf/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e21cd736c196..e21cd736c196 100644 --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 68cf9829f5a7..68cf9829f5a7 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c new file mode 100644 index 000000000000..de1a43e8f610 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <linux/bpf.h> +#include <netinet/in.h> +#include <stdbool.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +enum bpf_array_idx { + SRV_IDX, + CLI_IDX, + __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") addr_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct sockaddr_in6), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") tcp_sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_tcp_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") linum_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +static bool is_loopback6(__u32 *a6) +{ + return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); +} + +static void skcpy(struct bpf_sock *dst, + const struct bpf_sock *src) +{ + dst->bound_dev_if = src->bound_dev_if; + dst->family = src->family; + dst->type = src->type; + dst->protocol = src->protocol; + dst->mark = src->mark; + dst->priority = src->priority; + dst->src_ip4 = src->src_ip4; + dst->src_ip6[0] = src->src_ip6[0]; + dst->src_ip6[1] = src->src_ip6[1]; + dst->src_ip6[2] = src->src_ip6[2]; + dst->src_ip6[3] = src->src_ip6[3]; + dst->src_port = src->src_port; + dst->dst_ip4 = src->dst_ip4; + dst->dst_ip6[0] = src->dst_ip6[0]; + dst->dst_ip6[1] = src->dst_ip6[1]; + dst->dst_ip6[2] = src->dst_ip6[2]; + dst->dst_ip6[3] = src->dst_ip6[3]; + dst->dst_port = src->dst_port; + dst->state = src->state; +} + +static void tpcpy(struct bpf_tcp_sock *dst, + const struct bpf_tcp_sock *src) +{ + dst->snd_cwnd = src->snd_cwnd; + dst->srtt_us = src->srtt_us; + dst->rtt_min = src->rtt_min; + dst->snd_ssthresh = src->snd_ssthresh; + dst->rcv_nxt = src->rcv_nxt; + dst->snd_nxt = src->snd_nxt; + dst->snd_una = src->snd_una; + dst->mss_cache = src->mss_cache; + dst->ecn_flags = src->ecn_flags; + dst->rate_delivered = src->rate_delivered; + dst->rate_interval_us = src->rate_interval_us; + dst->packets_out = src->packets_out; + dst->retrans_out = src->retrans_out; + dst->total_retrans = src->total_retrans; + dst->segs_in = src->segs_in; + dst->data_segs_in = src->data_segs_in; + dst->segs_out = src->segs_out; + dst->data_segs_out = src->data_segs_out; + dst->lost_out = src->lost_out; + dst->sacked_out = src->sacked_out; + dst->bytes_received = src->bytes_received; + dst->bytes_acked = src->bytes_acked; +} + +#define RETURN { \ + linum = __LINE__; \ + bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + return 1; \ +} + +SEC("cgroup_skb/egress") +int read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + struct sockaddr_in6 *srv_sa6, *cli_sa6; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + __u32 linum, idx0 = 0; + + sk = skb->sk; + if (!sk || sk->state == 10) + RETURN; + + sk = bpf_sk_fullsock(sk); + if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || + !is_loopback6(sk->src_ip6)) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); + if (!srv_sa6 || !cli_sa6) + RETURN; + + if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) + idx = srv_idx; + else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) + idx = cli_idx; + else + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/progs/test_sockhash_kern.c index e6755916442a..e6755916442a 100644 --- a/tools/testing/selftests/bpf/test_sockhash_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sockhash_kern.c diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/progs/test_sockmap_kern.c index 677b2ed1cc1e..677b2ed1cc1e 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.c diff --git a/tools/testing/selftests/bpf/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 40f904312090..40f904312090 100644 --- a/tools/testing/selftests/bpf/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/progs/test_stack_map.c index 31c3880e6da0..31c3880e6da0 100644 --- a/tools/testing/selftests/bpf/test_stack_map.c +++ b/tools/testing/selftests/bpf/progs/test_stack_map.c diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index d86c281e957f..d86c281e957f 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index af111af7ca1a..af111af7ca1a 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index bee3bbecc0c4..bee3bbecc0c4 100644 --- a/tools/testing/selftests/bpf/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 74f73b33a7b0..74f73b33a7b0 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index edbca203ce2d..edbca203ce2d 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 04bf084517e0..04bf084517e0 100644 --- a/tools/testing/selftests/bpf/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 504df69c83df..504df69c83df 100644 --- a/tools/testing/selftests/bpf/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index 5e7df8bb5b5d..5e7df8bb5b5d 100644 --- a/tools/testing/selftests/bpf/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 8d0182650653..8d0182650653 100644 --- a/tools/testing/selftests/bpf/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 5e4aac74f9d0..5e4aac74f9d0 100644 --- a/tools/testing/selftests/bpf/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index ef9e704be140..ef9e704be140 100644 --- a/tools/testing/selftests/bpf/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 365a7d2d9f5c..365a7d2d9f5c 100644 --- a/tools/testing/selftests/bpf/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c diff --git a/tools/testing/selftests/bpf/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c index 43b0ef1001ed..43b0ef1001ed 100644 --- a/tools/testing/selftests/bpf/xdp_dummy.c +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index 7f8200a8702b..a53ed58528d6 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -30,12 +30,11 @@ def send(sock, s): serverPort = int(sys.argv[1]) -HostName = socket.gethostname() # create active socket sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) try: - sock.connect((HostName, serverPort)) + sock.connect(('localhost', serverPort)) except socket.error as e: sys.exit(1) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index b39903fca4c8..0ca60d193bed 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -35,13 +35,10 @@ MAX_PORTS = 2 serverPort = SERVER_PORT serverSocket = None -HostName = socket.gethostname() - # create passive socket serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -host = socket.gethostname() -try: serverSocket.bind((host, 0)) +try: serverSocket.bind(('localhost', 0)) except socket.error as msg: print('bind fails: ' + str(msg)) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 179f1d8ec5bf..02d314383a9c 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -52,18 +52,10 @@ static int count_result(int err) return err; } -#define __printf(a, b) __attribute__((format(printf, a, b))) - -__printf(1, 2) -static int __base_pr(const char *format, ...) +static int __base_pr(enum libbpf_print_level level __attribute__((unused)), + const char *format, va_list args) { - va_list args; - int err; - - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -78,12 +70,21 @@ static int __base_pr(const char *format, ...) BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_FWD_ENC(name, kind_flag) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0) + #define BTF_ARRAY_ENC(type, index_type, nr_elems) \ (type), (index_type), (nr_elems) #define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ BTF_ARRAY_ENC(type, index_type, nr_elems) +#define BTF_STRUCT_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz) + +#define BTF_UNION_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz) + #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) @@ -99,6 +100,12 @@ static int __base_pr(const char *format, ...) #define BTF_CONST_ENC(type) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) +#define BTF_VOLATILE_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type) + +#define BTF_RESTRICT_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type) + #define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) @@ -111,6 +118,10 @@ static int __base_pr(const char *format, ...) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f +#define NAME_NTH(N) (0xffff0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) +#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) + #define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 @@ -119,12 +130,14 @@ static struct args { unsigned int file_test_num; unsigned int get_info_test_num; unsigned int info_raw_test_num; + unsigned int dedup_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; bool info_raw_test; + bool dedup_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -1889,13 +1902,12 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_CONST_ENC(4), /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ BTF_FUNC_PROTO_ARG_ENC(0, 1), BTF_FUNC_PROTO_ARG_ENC(0, 2), BTF_END_RAW, @@ -1909,8 +1921,6 @@ static struct btf_raw_test raw_tests[] = { .key_type_id = 1, .value_type_id = 1, .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", }, { @@ -1965,7 +1975,7 @@ static struct btf_raw_test raw_tests[] = { /* void (*)(int a, unsigned int <bad_name_off>) */ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_FUNC_PROTO_ARG_ENC(0x0fffffff, 2), BTF_END_RAW, }, .str_sec = "\0a", @@ -2835,11 +2845,13 @@ static void *btf_raw_create(const struct btf_header *hdr, const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; + const char **strs_idx = NULL, **tmp_strs_idx; + int strs_cap = 0, strs_cnt = 0, next_str_idx = 0; unsigned int size_needed, offset; struct btf_header *ret_hdr; - int i, type_sec_size; + int i, type_sec_size, err = 0; uint32_t *ret_types; - void *raw_btf; + void *raw_btf = NULL; type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) @@ -2854,17 +2866,44 @@ static void *btf_raw_create(const struct btf_header *hdr, memcpy(raw_btf, hdr, sizeof(*hdr)); offset = sizeof(*hdr); + /* Index strings */ + while ((next_str = get_next_str(next_str, end_str))) { + if (strs_cnt == strs_cap) { + strs_cap += max(16, strs_cap / 2); + tmp_strs_idx = realloc(strs_idx, + sizeof(*strs_idx) * strs_cap); + if (CHECK(!tmp_strs_idx, + "Cannot allocate memory for strs_idx")) { + err = -1; + goto done; + } + strs_idx = tmp_strs_idx; + } + strs_idx[strs_cnt++] = next_str; + next_str += strlen(next_str); + } + /* Copy type section */ ret_types = raw_btf + offset; for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { if (raw_types[i] == NAME_TBD) { - next_str = get_next_str(next_str, end_str); - if (CHECK(!next_str, "Error in getting next_str")) { - free(raw_btf); - return NULL; + if (CHECK(next_str_idx == strs_cnt, + "Error in getting next_str #%d", + next_str_idx)) { + err = -1; + goto done; } - ret_types[i] = next_str - str; - next_str += strlen(next_str); + ret_types[i] = strs_idx[next_str_idx++] - str; + } else if (IS_NAME_NTH(raw_types[i])) { + int idx = GET_NAME_NTH_IDX(raw_types[i]); + + if (CHECK(idx <= 0 || idx > strs_cnt, + "Error getting string #%d, strs_cnt:%d", + idx, strs_cnt)) { + err = -1; + goto done; + } + ret_types[i] = strs_idx[idx-1] - str; } else { ret_types[i] = raw_types[i]; } @@ -2881,8 +2920,17 @@ static void *btf_raw_create(const struct btf_header *hdr, *btf_size = size_needed; if (ret_next_str) - *ret_next_str = next_str; + *ret_next_str = + next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; +done: + if (err) { + if (raw_btf) + free(raw_btf); + if (strs_idx) + free(strs_idx); + return NULL; + } return raw_btf; } @@ -5551,20 +5599,463 @@ static int test_info_raw(void) return err; } +struct btf_raw_data { + __u32 raw_types[MAX_NR_RAW_U32]; + const char *str_sec; + __u32 str_sec_size; +}; + +struct btf_dedup_test { + const char *descr; + struct btf_raw_data input; + struct btf_raw_data expect; + struct btf_dedup_opts opts; +}; + +const struct btf_dedup_test dedup_tests[] = { + +{ + .descr = "dedup: unused strings filtering", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: strings deduplication", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int\0int\0long int\0int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct example #1", + /* + * struct s { + * struct s *next; + * const int *a; + * int b[16]; + * int c; + * } + */ + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + + /* full copy of the above */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ + BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_PTR_ENC(9), /* [10] */ + BTF_PTR_ENC(12), /* [11] */ + BTF_CONST_ENC(7), /* [12] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: all possible kinds (no duplicates)", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: no int duplicates", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, + +}; + +static int btf_type_size(const struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u16 kind = BTF_INFO_KIND(t->info); + + switch (kind) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); + return -EINVAL; + } +} + +static void dump_btf_strings(const char *strs, __u32 len) +{ + const char *cur = strs; + int i = 0; + + while (cur < strs + len) { + fprintf(stderr, "string #%d: '%s'\n", i, cur); + cur += strlen(cur) + 1; + i++; + } +} + +static int do_test_dedup(unsigned int test_num) +{ + const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; + const struct btf_header *test_hdr, *expect_hdr; + struct btf *test_btf = NULL, *expect_btf = NULL; + const void *test_btf_data, *expect_btf_data; + const char *ret_test_next_str, *ret_expect_next_str; + const char *test_strs, *expect_strs; + const char *test_str_cur, *test_str_end; + const char *expect_str_cur, *expect_str_end; + unsigned int raw_btf_size; + void *raw_btf; + int err = 0, i; + + fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); + + raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, + test->input.str_sec, test->input.str_sec_size, + &raw_btf_size, &ret_test_next_str); + if (!raw_btf) + return -1; + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", + PTR_ERR(test_btf))) { + err = -1; + goto done; + } + + raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types, + test->expect.str_sec, + test->expect.str_sec_size, + &raw_btf_size, &ret_expect_next_str); + if (!raw_btf) + return -1; + expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", + PTR_ERR(expect_btf))) { + err = -1; + goto done; + } + + err = btf__dedup(test_btf, NULL, &test->opts); + if (CHECK(err, "btf_dedup failed errno:%d", err)) { + err = -1; + goto done; + } + + test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + if (CHECK(test_btf_size != expect_btf_size, + "test_btf_size:%u != expect_btf_size:%u", + test_btf_size, expect_btf_size)) { + err = -1; + goto done; + } + + test_hdr = test_btf_data; + test_strs = test_btf_data + test_hdr->str_off; + expect_hdr = expect_btf_data; + expect_strs = expect_btf_data + expect_hdr->str_off; + if (CHECK(test_hdr->str_len != expect_hdr->str_len, + "test_hdr->str_len:%u != expect_hdr->str_len:%u", + test_hdr->str_len, expect_hdr->str_len)) { + fprintf(stderr, "\ntest strings:\n"); + dump_btf_strings(test_strs, test_hdr->str_len); + fprintf(stderr, "\nexpected strings:\n"); + dump_btf_strings(expect_strs, expect_hdr->str_len); + err = -1; + goto done; + } + + test_str_cur = test_strs; + test_str_end = test_strs + test_hdr->str_len; + expect_str_cur = expect_strs; + expect_str_end = expect_strs + expect_hdr->str_len; + while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + size_t test_len, expect_len; + + test_len = strlen(test_str_cur); + expect_len = strlen(expect_str_cur); + if (CHECK(test_len != expect_len, + "test_len:%zu != expect_len:%zu " + "(test_str:%s, expect_str:%s)", + test_len, expect_len, test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + if (CHECK(strcmp(test_str_cur, expect_str_cur), + "test_str:%s != expect_str:%s", + test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + test_str_cur += test_len + 1; + expect_str_cur += expect_len + 1; + } + if (CHECK(test_str_cur != test_str_end, + "test_str_cur:%p != test_str_end:%p", + test_str_cur, test_str_end)) { + err = -1; + goto done; + } + + test_nr_types = btf__get_nr_types(test_btf); + expect_nr_types = btf__get_nr_types(expect_btf); + if (CHECK(test_nr_types != expect_nr_types, + "test_nr_types:%u != expect_nr_types:%u", + test_nr_types, expect_nr_types)) { + err = -1; + goto done; + } + + for (i = 1; i <= test_nr_types; i++) { + const struct btf_type *test_type, *expect_type; + int test_size, expect_size; + + test_type = btf__type_by_id(test_btf, i); + expect_type = btf__type_by_id(expect_btf, i); + test_size = btf_type_size(test_type); + expect_size = btf_type_size(expect_type); + + if (CHECK(test_size != expect_size, + "type #%d: test_size:%d != expect_size:%u", + i, test_size, expect_size)) { + err = -1; + goto done; + } + if (CHECK(memcmp((void *)test_type, + (void *)expect_type, + test_size), + "type #%d: contents differ", i)) { + err = -1; + goto done; + } + } + +done: + if (!err) + fprintf(stderr, "OK"); + if (!IS_ERR(test_btf)) + btf__free(test_btf); + if (!IS_ERR(expect_btf)) + btf__free(expect_btf); + + return err; +} + +static int test_dedup(void) +{ + unsigned int i; + int err = 0; + + if (args.dedup_test_num) + return count_result(do_test_dedup(args.dedup_test_num)); + + for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) + err |= count_result(do_test_dedup(i)); + + return err; +} + static void usage(const char *cmd) { fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" "\t[-g btf_get_info_test_num (1 - %zu)] |\n" "\t[-f btf_file_test_num (1 - %zu)] |\n" "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)]]\n", + "\t[-p (pretty print test)] |\n" + "\t[-d btf_dedup_test_num (1 - %zu)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), + ARRAY_SIZE(dedup_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpk:f:r:g:"; + const char *optstr = "hlpk:f:r:g:d:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -5591,12 +6082,16 @@ static int parse_args(int argc, char **argv) args.info_raw_test_num = atoi(optarg); args.info_raw_test = true; break; + case 'd': + args.dedup_test_num = atoi(optarg); + args.dedup_test = true; + break; case 'h': usage(argv[0]); exit(0); default: - usage(argv[0]); - return -1; + usage(argv[0]); + return -1; } } @@ -5632,6 +6127,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.dedup_test_num && + (args.dedup_test_num < 1 || + args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { + fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", + ARRAY_SIZE(dedup_tests)); + return -1; + } + return 0; } @@ -5650,7 +6153,7 @@ int main(int argc, char **argv) return err; if (args.always_log) - libbpf_set_print(__base_pr, __base_pr, __base_pr); + libbpf_set_print(__base_pr); if (args.raw_test) err |= test_raw(); @@ -5667,14 +6170,18 @@ int main(int argc, char **argv) if (args.info_raw_test) err |= test_info_raw(); + if (args.dedup_test) + err |= test_dedup(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test) + args.pprint_test || args.info_raw_test || args.dedup_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); err |= test_info_raw(); + err |= test_dedup(); done: print_summary(); diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c index 8fcd1c076add..1909ecf4d999 100644 --- a/tools/testing/selftests/bpf/test_libbpf_open.c +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -34,23 +34,16 @@ static void usage(char *argv[]) printf("\n"); } -#define DEFINE_PRINT_FN(name, enabled) \ -static int libbpf_##name(const char *fmt, ...) \ -{ \ - va_list args; \ - int ret; \ - \ - va_start(args, fmt); \ - if (enabled) { \ - fprintf(stderr, "[" #name "] "); \ - ret = vfprintf(stderr, fmt, args); \ - } \ - va_end(args); \ - return ret; \ +static bool debug = 0; +static int libbpf_debug_print(enum libbpf_print_level level, + const char *fmt, va_list args) +{ + if (level == LIBBPF_DEBUG && !debug) + return 0; + + fprintf(stderr, "[%d] ", level); + return vfprintf(stderr, fmt, args); } -DEFINE_PRINT_FN(warning, 1) -DEFINE_PRINT_FN(info, 1) -DEFINE_PRINT_FN(debug, 1) #define EXIT_FAIL_LIBBPF EXIT_FAILURE #define EXIT_FAIL_OPTION 2 @@ -120,15 +113,14 @@ int main(int argc, char **argv) int longindex = 0; int opt; - libbpf_set_print(libbpf_warning, libbpf_info, NULL); + libbpf_set_print(libbpf_debug_print); /* Parse commands line args */ while ((opt = getopt_long(argc, argv, "hDq", long_options, &longindex)) != -1) { switch (opt) { case 'D': - libbpf_set_print(libbpf_warning, libbpf_info, - libbpf_debug); + debug = 1; break; case 'q': /* Use in scripting mode */ verbose = 0; diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh new file mode 100755 index 000000000000..612632c1425f --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -0,0 +1,376 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Setup/topology: +# +# NS1 NS2 NS3 +# veth1 <---> veth2 veth3 <---> veth4 (the top route) +# veth5 <---> veth6 veth7 <---> veth8 (the bottom route) +# +# each vethN gets IPv[4|6]_N address +# +# IPv*_SRC = IPv*_1 +# IPv*_DST = IPv*_4 +# +# all tests test pings from IPv*_SRC to IPv*_DST +# +# by default, routes are configured to allow packets to go +# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) +# +# a GRE device is installed in NS3 with IPv*_GRE, and +# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 +# (the bottom route) +# +# Tests: +# +# 1. routes NS2->IPv*_DST are brought down, so the only way a ping +# from IP*_SRC to IP*_DST can work is via IPv*_GRE +# +# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 +# that encaps the packets with an IP/GRE header to route to IPv*_GRE +# +# ping: SRC->[encap at veth1:egress]->GRE:decap->DST +# ping replies go DST->SRC directly +# +# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 +# that encaps the packets with an IP/GRE header to route to IPv*_GRE +# +# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST +# ping replies go DST->SRC directly + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" +readonly NS3="ns3-$(mktemp -u XXXXXX)" + +readonly IPv4_1="172.16.1.100" +readonly IPv4_2="172.16.2.100" +readonly IPv4_3="172.16.3.100" +readonly IPv4_4="172.16.4.100" +readonly IPv4_5="172.16.5.100" +readonly IPv4_6="172.16.6.100" +readonly IPv4_7="172.16.7.100" +readonly IPv4_8="172.16.8.100" +readonly IPv4_GRE="172.16.16.100" + +readonly IPv4_SRC=$IPv4_1 +readonly IPv4_DST=$IPv4_4 + +readonly IPv6_1="fb01::1" +readonly IPv6_2="fb02::1" +readonly IPv6_3="fb03::1" +readonly IPv6_4="fb04::1" +readonly IPv6_5="fb05::1" +readonly IPv6_6="fb06::1" +readonly IPv6_7="fb07::1" +readonly IPv6_8="fb08::1" +readonly IPv6_GRE="fb10::1" + +readonly IPv6_SRC=$IPv6_1 +readonly IPv6_DST=$IPv6_4 + +TEST_STATUS=0 +TESTS_SUCCEEDED=0 +TESTS_FAILED=0 + +process_test_results() +{ + if [[ "${TEST_STATUS}" -eq 0 ]] ; then + echo "PASS" + TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1)) + else + echo "FAIL" + TESTS_FAILED=$((TESTS_FAILED+1)) + fi +} + +print_test_summary_and_exit() +{ + echo "passed tests: ${TESTS_SUCCEEDED}" + echo "failed tests: ${TESTS_FAILED}" + if [ "${TESTS_FAILED}" -eq "0" ] ; then + exit 0 + else + exit 1 + fi +} + +setup() +{ + set -e # exit on error + TEST_STATUS=0 + + # create devices and namespaces + ip netns add "${NS1}" + ip netns add "${NS2}" + ip netns add "${NS3}" + + ip link add veth1 type veth peer name veth2 + ip link add veth3 type veth peer name veth4 + ip link add veth5 type veth peer name veth6 + ip link add veth7 type veth peer name veth8 + + ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip link set veth1 netns ${NS1} + ip link set veth2 netns ${NS2} + ip link set veth3 netns ${NS2} + ip link set veth4 netns ${NS3} + ip link set veth5 netns ${NS1} + ip link set veth6 netns ${NS2} + ip link set veth7 netns ${NS2} + ip link set veth8 netns ${NS3} + + # configure addesses: the top route (1-2-3-4) + ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 + ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 + ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 + ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 + ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 + ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 + ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 + ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 + + # configure addresses: the bottom route (5-6-7-8) + ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 + ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 + ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 + ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 + ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 + ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 + ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 + ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 + + + ip -netns ${NS1} link set dev veth1 up + ip -netns ${NS2} link set dev veth2 up + ip -netns ${NS2} link set dev veth3 up + ip -netns ${NS3} link set dev veth4 up + ip -netns ${NS1} link set dev veth5 up + ip -netns ${NS2} link set dev veth6 up + ip -netns ${NS2} link set dev veth7 up + ip -netns ${NS3} link set dev veth8 up + + # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; + # the bottom route to specific bottom addresses + + # NS1 + # top route + ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 + ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default + ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 + ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default + # bottom route + ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 + ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 + ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} + ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} + + # NS2 + # top route + ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 + ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 + ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 + # bottom route + ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 + ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 + ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 + ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 + + # NS3 + # top route + ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 + ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} + ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} + ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 + ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} + ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} + # bottom route + ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 + ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} + ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} + ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 + ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} + ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} + + # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route + ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 + ip -netns ${NS3} link set gre_dev up + ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev + ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} + + + # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route + ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 + ip -netns ${NS3} link set gre6_dev up + ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev + ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} + ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} + + # rp_filter gets confused by what these tests are doing, so disable it + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 + + sleep 1 # reduce flakiness + set +e +} + +cleanup() +{ + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null + ip netns del ${NS3} 2> /dev/null +} + +trap cleanup EXIT + +remove_routes_to_gredev() +{ + ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 + ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 + ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 + ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 +} + +add_unreachable_routes_to_gredev() +{ + ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 + ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 + ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 + ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 +} + +test_ping() +{ + local readonly PROTO=$1 + local readonly EXPECTED=$2 + local RET=0 + + if [ "${PROTO}" == "IPv4" ] ; then + ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null + RET=$? + elif [ "${PROTO}" == "IPv6" ] ; then + ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null + RET=$? + else + echo " test_ping: unknown PROTO: ${PROTO}" + TEST_STATUS=1 + fi + + if [ "0" != "${RET}" ]; then + RET=1 + fi + + if [ "${EXPECTED}" != "${RET}" ] ; then + echo " test_ping failed: expected: ${EXPECTED}; got ${RET}" + TEST_STATUS=1 + fi +} + +test_egress() +{ + local readonly ENCAP=$1 + echo "starting egress ${ENCAP} encap test" + setup + + # by default, pings work + test_ping IPv4 0 + test_ping IPv6 0 + + # remove NS2->DST routes, ping fails + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + test_ping IPv4 1 + test_ping IPv6 1 + + # install replacement routes (LWT/eBPF), pings succeed + if [ "${ENCAP}" == "IPv4" ] ; then + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 + elif [ "${ENCAP}" == "IPv6" ] ; then + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 + else + echo " unknown encap ${ENCAP}" + TEST_STATUS=1 + fi + test_ping IPv4 0 + test_ping IPv6 0 + + # a negative test: remove routes to GRE devices: ping fails + remove_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + # another negative test + add_unreachable_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + cleanup + process_test_results +} + +test_ingress() +{ + local readonly ENCAP=$1 + echo "starting ingress ${ENCAP} encap test" + setup + + # need to wait a bit for IPv6 to autoconf, otherwise + # ping6 sometimes fails with "unable to bind to address" + + # by default, pings work + test_ping IPv4 0 + test_ping IPv6 0 + + # remove NS2->DST routes, pings fail + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + test_ping IPv4 1 + test_ping IPv6 1 + + # install replacement routes (LWT/eBPF), pings succeed + if [ "${ENCAP}" == "IPv4" ] ; then + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 + elif [ "${ENCAP}" == "IPv6" ] ; then + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 + else + echo "FAIL: unknown encap ${ENCAP}" + fi + test_ping IPv4 0 + test_ping IPv6 0 + + # a negative test: remove routes to GRE devices: ping fails + remove_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + # another negative test + add_unreachable_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + cleanup + process_test_results +} + +test_egress IPv4 +test_egress IPv6 +test_ingress IPv4 +test_ingress IPv6 + +print_test_summary_and_exit diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index e7798dd97f4b..3c627771f965 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -45,7 +45,7 @@ static int map_flags; } \ }) -static void test_hashmap(int task, void *data) +static void test_hashmap(unsigned int task, void *data) { long long key, next_key, first_key, value; int fd; @@ -135,7 +135,7 @@ static void test_hashmap(int task, void *data) close(fd); } -static void test_hashmap_sizes(int task, void *data) +static void test_hashmap_sizes(unsigned int task, void *data) { int fd, i, j; @@ -155,7 +155,7 @@ static void test_hashmap_sizes(int task, void *data) } } -static void test_hashmap_percpu(int task, void *data) +static void test_hashmap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); BPF_DECLARE_PERCPU(long, value); @@ -282,7 +282,7 @@ static int helper_fill_hashmap(int max_entries) return fd; } -static void test_hashmap_walk(int task, void *data) +static void test_hashmap_walk(unsigned int task, void *data) { int fd, i, max_entries = 1000; long long key, value, next_key; @@ -353,7 +353,7 @@ static void test_hashmap_zero_seed(void) close(second); } -static void test_arraymap(int task, void *data) +static void test_arraymap(unsigned int task, void *data) { int key, next_key, fd; long long value; @@ -408,7 +408,7 @@ static void test_arraymap(int task, void *data) close(fd); } -static void test_arraymap_percpu(int task, void *data) +static void test_arraymap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); BPF_DECLARE_PERCPU(long, values); @@ -504,7 +504,7 @@ static void test_arraymap_percpu_many_keys(void) close(fd); } -static void test_devmap(int task, void *data) +static void test_devmap(unsigned int task, void *data) { int fd; __u32 key, value; @@ -519,7 +519,7 @@ static void test_devmap(int task, void *data) close(fd); } -static void test_queuemap(int task, void *data) +static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; __u32 vals[MAP_SIZE + MAP_SIZE/2], val; @@ -577,7 +577,7 @@ static void test_queuemap(int task, void *data) close(fd); } -static void test_stackmap(int task, void *data) +static void test_stackmap(unsigned int task, void *data) { const int MAP_SIZE = 32; __u32 vals[MAP_SIZE + MAP_SIZE/2], val; @@ -642,7 +642,7 @@ static void test_stackmap(int task, void *data) #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o" #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" #define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o" -static void test_sockmap(int tasks, void *data) +static void test_sockmap(unsigned int tasks, void *data) { struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break; int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break; @@ -1268,10 +1268,11 @@ static void test_map_large(void) } #define run_parallel(N, FN, DATA) \ - printf("Fork %d tasks to '" #FN "'\n", N); \ + printf("Fork %u tasks to '" #FN "'\n", N); \ __run_parallel(N, FN, DATA) -static void __run_parallel(int tasks, void (*fn)(int task, void *data), +static void __run_parallel(unsigned int tasks, + void (*fn)(unsigned int task, void *data), void *data) { pid_t pid[tasks]; @@ -1312,7 +1313,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void test_update_delete(int fn, void *data) +static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index d59642e70f56..84bea3985d64 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -23,6 +23,7 @@ import string import struct import subprocess import time +import traceback logfile = None log_level = 1 @@ -78,7 +79,9 @@ def fail(cond, msg): if not cond: return print("FAIL: " + msg) - log("FAIL: " + msg, "", level=1) + tb = "".join(traceback.extract_stack().format()) + print(tb) + log("FAIL: " + msg, tb, level=1) os.sys.exit(1) def start_test(msg): @@ -589,6 +592,15 @@ def check_verifier_log(output, reference): return fail(True, "Missing or incorrect message from netdevsim in verifier log") +def check_multi_basic(two_xdps): + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "Offloaded and other programs have the same id") + def test_spurios_extack(sim, obj, skip_hw, needle): res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, include_stderr=True) @@ -600,6 +612,67 @@ def test_spurios_extack(sim, obj, skip_hw, needle): include_stderr=True) check_no_extack(res, needle) +def test_multi_prog(sim, obj, modename, modeid): + start_test("Test multi-attachment XDP - %s + offload..." % + (modename or "default", )) + sim.set_xdp(obj, "offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + fail("prog" not in xdp, "Base program not reported in single program mode") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with one program") + + sim.set_xdp(obj, modename) + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + check_multi_basic(two_xdps) + + offloaded2 = sim.dfs_read("bpf_offloaded_id") + fail(offloaded != offloaded2, + "Offload ID changed after loading other program") + + start_test("Test multi-attachment XDP - replace...") + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Replaced one of programs without -force") + check_extack(err, "XDP program already attached.", args) + + if modename == "" or modename == "drv": + othermode = "" if modename == "drv" else "drv" + start_test("Test multi-attachment XDP - detach...") + ret, _, err = sim.unset_xdp(othermode, force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode") + check_extack(err, "program loaded with different flags.", args) + + sim.unset_xdp("offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + + fail(xdp["mode"] != modeid, "Bad mode reported after multiple programs") + fail("prog" not in xdp, + "Base program not reported after multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with remaining programs") + fail(offloaded != "0", "Offload ID reported with only other program left") + + start_test("Test multi-attachment XDP - reattach...") + sim.set_xdp(obj, "offload") + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Other program not reported after offload activated") + check_multi_basic(two_xdps) + + start_test("Test multi-attachment XDP - device remove...") + sim.remove() + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + return sim # Parse command line parser = argparse.ArgumentParser() @@ -842,7 +915,9 @@ try: ret, _, err = sim.set_xdp(obj, "generic", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") - fail(err.count("File exists") != 1, "Replaced driver XDP with generic") + check_extack(err, + "native and generic XDP can't be active at the same time.", + args) ret, _, err = sim.set_xdp(obj, "", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") @@ -931,59 +1006,9 @@ try: rm(pin_file) bpftool_prog_list_wait(expected=0) - start_test("Test multi-attachment XDP - attach...") - sim.set_xdp(obj, "offload") - xdp = sim.ip_link_show(xdp=True)["xdp"] - offloaded = sim.dfs_read("bpf_offloaded_id") - fail("prog" not in xdp, "Base program not reported in single program mode") - fail(len(ipl["xdp"]["attached"]) != 1, - "Wrong attached program count with one program") - - sim.set_xdp(obj, "") - two_xdps = sim.ip_link_show(xdp=True)["xdp"] - offloaded2 = sim.dfs_read("bpf_offloaded_id") - - fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") - fail("prog" in two_xdps, "Base program reported in multi program mode") - fail(xdp["attached"][0] not in two_xdps["attached"], - "Offload program not reported after driver activated") - fail(len(two_xdps["attached"]) != 2, - "Wrong attached program count with two programs") - fail(two_xdps["attached"][0]["prog"]["id"] == - two_xdps["attached"][1]["prog"]["id"], - "offloaded and drv programs have the same id") - fail(offloaded != offloaded2, - "offload ID changed after loading driver program") - - start_test("Test multi-attachment XDP - replace...") - ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) - fail(err.count("busy") != 1, "Replaced one of programs without -force") - - start_test("Test multi-attachment XDP - detach...") - ret, _, err = sim.unset_xdp("drv", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) - - sim.unset_xdp("offload") - xdp = sim.ip_link_show(xdp=True)["xdp"] - offloaded = sim.dfs_read("bpf_offloaded_id") - - fail(xdp["mode"] != 1, "Bad mode reported after multiple programs") - fail("prog" not in xdp, - "Base program not reported after multi program mode") - fail(xdp["attached"][0] not in two_xdps["attached"], - "Offload program not reported after driver activated") - fail(len(ipl["xdp"]["attached"]) != 1, - "Wrong attached program count with remaining programs") - fail(offloaded != "0", "offload ID reported with only driver program left") - - start_test("Test multi-attachment XDP - device remove...") - sim.set_xdp(obj, "offload") - sim.remove() - - sim = NetdevSim() - sim.set_ethtool_tc_offloads(True) + sim = test_multi_prog(sim, obj, "", 1) + sim = test_multi_prog(sim, obj, "drv", 1) + sim = test_multi_prog(sim, obj, "generic", 2) start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index a08d026ac396..c52bd90fbb34 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,6 +10,7 @@ #include <string.h> #include <assert.h> #include <stdlib.h> +#include <stdarg.h> #include <time.h> #include <linux/types.h> @@ -1783,6 +1784,15 @@ static void test_task_fd_query_tp(void) "sys_enter_read"); } +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG) + return 0; + + return vfprintf(stderr, format, args); +} + static void test_reference_tracking() { const char *file = "./test_sk_lookup_kern.o"; @@ -1809,9 +1819,9 @@ static void test_reference_tracking() /* Expect verifier failure if test name has 'fail' */ if (strstr(title, "fail") != NULL) { - libbpf_set_print(NULL, NULL, NULL); + libbpf_set_print(NULL); err = !bpf_program__load(prog, "GPL", 0); - libbpf_set_print(printf, printf, NULL); + libbpf_set_print(libbpf_debug_print); } else { err = bpf_program__load(prog, "GPL", 0); } diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 561ffb6d6433..fb679ac3d4b0 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -20,6 +20,7 @@ #define MAX_INSNS 512 char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static bool verbose = false; struct sock_test { const char *descr; @@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog, enum bpf_attach_type attach_type) { struct bpf_load_program_attr attr; + int ret; memset(&attr, 0, sizeof(struct bpf_load_program_attr)); attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; @@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog, attr.insns = prog; attr.insns_cnt = probe_prog_length(attr.insns); attr.license = "GPL"; + attr.log_level = 2; - return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + if (verbose && ret < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return ret; } static int attach_sock_prog(int cgfd, int progfd, diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c new file mode 100644 index 000000000000..9bb58369b481 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <sys/socket.h> +#include <sys/epoll.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" + +enum bpf_array_idx { + SRV_IDX, + CLI_IDX, + __NR_BPF_ARRAY_IDX, +}; + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ + printf(format); \ + printf("\n"); \ + exit(-1); \ + } \ +}) + +#define TEST_CGROUP "/test-bpf-sock-fields" +#define DATA "Hello BPF!" +#define DATA_LEN sizeof(DATA) + +static struct sockaddr_in6 srv_sa6, cli_sa6; +static int linum_map_fd; +static int addr_map_fd; +static int tp_map_fd; +static int sk_map_fd; +static __u32 srv_idx = SRV_IDX; +static __u32 cli_idx = CLI_IDX; + +static void init_loopback6(struct sockaddr_in6 *sa6) +{ + memset(sa6, 0, sizeof(*sa6)); + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = in6addr_loopback; +} + +static void print_sk(const struct bpf_sock *sk) +{ + char src_ip4[24], dst_ip4[24]; + char src_ip6[64], dst_ip6[64]; + + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); + + printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, + sk->mark, sk->priority, + sk->src_ip4, src_ip4, + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], + src_ip6, sk->src_port, + sk->dst_ip4, dst_ip4, + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], + dst_ip6, ntohs(sk->dst_port)); +} + +static void print_tp(const struct bpf_tcp_sock *tp) +{ + printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " + "rate_delivered:%u rate_interval_us:%u packets_out:%u " + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " + "bytes_received:%llu bytes_acked:%llu\n", + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, + tp->packets_out, tp->retrans_out, tp->total_retrans, + tp->segs_in, tp->data_segs_in, tp->segs_out, + tp->data_segs_out, tp->lost_out, tp->sacked_out, + tp->bytes_received, tp->bytes_acked); +} + +static void check_result(void) +{ + struct bpf_tcp_sock srv_tp, cli_tp; + struct bpf_sock srv_sk, cli_sk; + __u32 linum, idx0 = 0; + int err; + + err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", + "err:%d errno:%d", err, errno); + + printf("srv_sk: "); + print_sk(&srv_sk); + printf("\n"); + + printf("cli_sk: "); + print_sk(&cli_sk); + printf("\n"); + + printf("srv_tp: "); + print_tp(&srv_tp); + printf("\n"); + + printf("cli_tp: "); + print_tp(&cli_tp); + printf("\n"); + + CHECK(srv_sk.state == 10 || + !srv_sk.state || + srv_sk.family != AF_INET6 || + srv_sk.protocol != IPPROTO_TCP || + memcmp(srv_sk.src_ip6, &in6addr_loopback, + sizeof(srv_sk.src_ip6)) || + memcmp(srv_sk.dst_ip6, &in6addr_loopback, + sizeof(srv_sk.dst_ip6)) || + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || + srv_sk.dst_port != cli_sa6.sin6_port, + "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); + + CHECK(cli_sk.state == 10 || + !cli_sk.state || + cli_sk.family != AF_INET6 || + cli_sk.protocol != IPPROTO_TCP || + memcmp(cli_sk.src_ip6, &in6addr_loopback, + sizeof(cli_sk.src_ip6)) || + memcmp(cli_sk.dst_ip6, &in6addr_loopback, + sizeof(cli_sk.dst_ip6)) || + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || + cli_sk.dst_port != srv_sa6.sin6_port, + "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); + + CHECK(srv_tp.data_segs_out != 1 || + srv_tp.data_segs_in || + srv_tp.snd_cwnd != 10 || + srv_tp.total_retrans || + srv_tp.bytes_acked != DATA_LEN, + "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); + + CHECK(cli_tp.data_segs_out || + cli_tp.data_segs_in != 1 || + cli_tp.snd_cwnd != 10 || + cli_tp.total_retrans || + cli_tp.bytes_received != DATA_LEN, + "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); +} + +static void test(void) +{ + int listen_fd, cli_fd, accept_fd, epfd, err; + struct epoll_event ev; + socklen_t addrlen; + + addrlen = sizeof(struct sockaddr_in6); + ev.events = EPOLLIN; + + epfd = epoll_create(1); + CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); + + /* Prepare listen_fd */ + listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", + listen_fd, errno); + + init_loopback6(&srv_sa6); + err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); + CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); + + err = listen(listen_fd, 1); + CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); + + /* Prepare cli_fd */ + cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); + + init_loopback6(&cli_sa6); + err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); + CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); + CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", + err, errno); + + /* Update addr_map with srv_sa6 and cli_sa6 */ + err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + /* Connect from cli_sa6 to srv_sa6 */ + err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); + printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", + ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); + CHECK(err && errno != EINPROGRESS, + "connect(cli_fd)", "err:%d errno:%d", err, errno); + + ev.data.fd = listen_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", + err, errno); + + /* Accept the connection */ + /* Have some timeout in accept(listen_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != listen_fd, + "epoll_wait(listen_fd)", + "err:%d errno:%d ev.data.fd:%d listen_fd:%d", + err, errno, ev.data.fd, listen_fd); + + accept_fd = accept(listen_fd, NULL, NULL); + CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", + accept_fd, errno); + close(listen_fd); + + /* Send some data from accept_fd to cli_fd */ + err = send(accept_fd, DATA, DATA_LEN, 0); + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", + err, errno); + + /* Have some timeout in recv(cli_fd). Just in case. */ + ev.data.fd = cli_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", + err, errno); + + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", + err, errno, ev.data.fd, cli_fd); + + err = recv(cli_fd, NULL, 0, MSG_TRUNC); + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + + close(epfd); + close(accept_fd); + close(cli_fd); + + check_result(); +} + +int main(int argc, char **argv) +{ + struct bpf_prog_load_attr attr = { + .file = "test_sock_fields_kern.o", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .expected_attach_type = BPF_CGROUP_INET_EGRESS, + }; + int cgroup_fd, prog_fd, err; + struct bpf_object *obj; + struct bpf_map *map; + + err = setup_cgroup_environment(); + CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", + err, errno); + + atexit(cleanup_cgroup_environment); + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + CHECK(cgroup_fd == -1, "create_and_get_cgroup()", + "cgroup_fd:%d errno:%d", cgroup_fd, errno); + + err = join_cgroup(TEST_CGROUP); + CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); + + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", + "err:%d errno%d", err, errno); + close(cgroup_fd); + + map = bpf_object__find_map_by_name(obj, "addr_map"); + CHECK(!map, "cannot find addr_map", "(null)"); + addr_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sock_result_map"); + CHECK(!map, "cannot find sock_result_map", "(null)"); + sk_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); + CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); + tp_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "linum_map"); + CHECK(!map, "cannot find linum_map", "(null)"); + linum_map_fd = bpf_map__fd(map); + + test(); + + bpf_object__close(obj); + cleanup_cgroup_environment(); + + printf("PASS\n"); + + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c index b0195770da6a..c6c69220a569 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c @@ -100,6 +100,7 @@ .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid read past end of SK_MSG", diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 881f1c7f57a1..c660deb582f1 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -687,6 +687,7 @@ }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check skb->hash half load not permitted, unaligned 3", diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index ceb39ffa0e88..f0961c58581e 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -27,6 +27,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset32: BPF_X", @@ -58,6 +59,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset32: min/max deduction", @@ -93,6 +95,7 @@ .data64 = { -1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jeq32: BPF_X", @@ -119,6 +122,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jeq32: min/max deduction", @@ -154,6 +158,7 @@ .data64 = { -1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jne32: BPF_X", @@ -180,6 +185,7 @@ .data64 = { 1ULL << 63 | 2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jne32: min/max deduction", @@ -218,6 +224,7 @@ .data64 = { 0, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jge32: BPF_X", @@ -244,6 +251,7 @@ .data64 = { (UINT_MAX - 1) | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jge32: min/max deduction", @@ -284,6 +292,7 @@ .data64 = { 0, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: BPF_X", @@ -310,6 +319,7 @@ .data64 = { (UINT_MAX - 1) | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: min/max deduction", @@ -350,6 +360,7 @@ .data64 = { INT_MAX, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jle32: BPF_X", @@ -376,6 +387,7 @@ .data64 = { UINT_MAX, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jle32: min/max deduction", @@ -416,6 +428,7 @@ .data64 = { INT_MAX - 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jlt32: BPF_X", @@ -442,6 +455,7 @@ .data64 = { (INT_MAX - 1) | 3ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jlt32: min/max deduction", @@ -482,6 +496,7 @@ .data64 = { -2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsge32: BPF_X", @@ -508,6 +523,7 @@ .data64 = { -2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsge32: min/max deduction", @@ -548,6 +564,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsgt32: BPF_X", @@ -574,6 +591,7 @@ .data64 = { 0x7fffffff, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsgt32: min/max deduction", @@ -614,6 +632,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsle32: BPF_X", @@ -640,6 +659,7 @@ .data64 = { 0x7fffffff | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsle32: min/max deduction", @@ -680,6 +700,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jslt32: BPF_X", @@ -706,6 +727,7 @@ .data64 = { 0x7fffffff | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jslt32: min/max deduction", diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 7e14037acfaf..8dcd4e0383d5 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -53,6 +53,7 @@ .data64 = { ~0ULL, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset: sign-extend", @@ -70,6 +71,7 @@ .result = ACCEPT, .retval = 2, .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset: known const compare", diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index dc2cc823df2b..3ed3593bd8b6 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -547,7 +547,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "cannot write into socket", + .errstr = "cannot write into sock", .result = REJECT, }, { @@ -562,7 +562,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid bpf_sock access off=0 size=8", + .errstr = "invalid sock access off=0 size=8", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c new file mode 100644 index 000000000000..0ddfdf76aba5 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -0,0 +1,384 @@ +{ + "skb->sk: no NULL check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'sock_common_or_null'", +}, +{ + "skb->sk: sk->family [non fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "skb->sk: sk->type [fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock_common access", +}, +{ + "bpf_sk_fullsock(skb->sk): no !skb->sk check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "type=sock_common_or_null expected=sock_common", +}, +{ + "sk_fullsock(skb->sk): no NULL check on ret", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'sock_or_null'", +}, +{ + "sk_fullsock(skb->sk): sk->type [fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->family [non fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->state [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->type [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->protocol [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): beyond last field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "bpf_tcp_sock(skb->sk): no !skb->sk check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "type=sock_common_or_null expected=sock_common", +}, +{ + "bpf_tcp_sock(skb->sk): no NULL check on ret", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'tcp_sock_or_null'", +}, +{ + "bpf_tcp_sock(skb->sk): tp->snd_cwnd", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_tcp_sock(skb->sk): tp->bytes_acked", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_tcp_sock(skb->sk): beyond last field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid tcp_sock access", +}, +{ + "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_sk_release(skb->sk)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "type=sock_common expected=sock", +}, +{ + "bpf_sk_release(bpf_sk_fullsock(skb->sk))", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "reference has not been acquired before", +}, +{ + "bpf_sk_release(bpf_tcp_sock(skb->sk))", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "type=tcp_sock expected=sock", +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index d58db72fdfe8..45d43bf82f26 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -46,6 +46,7 @@ .errstr_unpriv = "attempt to corrupt spilled", .errstr = "R0 invalid mem access 'inv", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check corrupted spill/fill, LSB", diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c index d829eef372a4..781621facae4 100644 --- a/tools/testing/selftests/bpf/verifier/spin_lock.c +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -83,6 +83,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "spin_lock: test4 direct ld/st", @@ -112,6 +113,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "spin_lock: test5 call within a locked region", diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 3e046695fad7..dbaf5be947b2 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -365,7 +365,7 @@ }, .result = REJECT, //.errstr = "same insn cannot be used with different pointers", - .errstr = "cannot write into socket", + .errstr = "cannot write into sock", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 9ab5ace83e02..4b721a77bebb 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -512,6 +512,7 @@ .fixup_map_array_48b = { 3 }, .result = ACCEPT, .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: unknown scalar += value_ptr, 3", @@ -537,6 +538,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: unknown scalar += value_ptr, 4", @@ -559,6 +561,7 @@ .result = REJECT, .errstr = "R1 max value is outside of the array range", .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: value_ptr += unknown scalar, 1", @@ -598,6 +601,7 @@ .fixup_map_array_48b = { 3 }, .result = ACCEPT, .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: value_ptr += unknown scalar, 3", diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index bab13dd025a6..0d26b5e3f966 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -37,6 +37,10 @@ prerequisite() exit $ksft_skip fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -151,6 +155,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh new file mode 100755 index 000000000000..5ba5bef44d5b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that blackhole routes are marked as offloaded and that packets hitting +# them are dropped by the ASIC and not by the kernel. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + blackhole_ipv4 + blackhole_ipv6 +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp1 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp1 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +blackhole_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are dropped by the + # ASIC and not by the kernel + RET=0 + + ip -4 route add blackhole 198.51.100.0/30 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \ + action pass + + ip -4 route show 198.51.100.0/30 | grep -q offload + check_err $? "route not marked as offloaded when should" + + ping_do $h1 198.51.100.1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv4 blackhole route" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + ip -4 route del blackhole 198.51.100.0/30 +} + +blackhole_ipv6() +{ + RET=0 + + ip -6 route add blackhole 2001:db8:2::/120 + tc filter add dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \ + ip_proto icmpv6 action pass + + ip -6 route show 2001:db8:2::/120 | grep -q offload + check_err $? "route not marked as offloaded when should" + + ping6_do $h1 2001:db8:2::1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv6 blackhole route" + + tc filter del dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower + ip -6 route del blackhole 2001:db8:2::/120 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index f1922bf597b0..4d5b880b1d96 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,11 +9,11 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - delta_two_masks_one_key_test bloom_simple_test \ - bloom_complex_test bloom_delta_test" + delta_two_masks_one_key_test delta_simple_rehash_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh -source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh tcflags="skip_hw" @@ -494,6 +494,77 @@ delta_two_masks_one_key_test() log_test "delta two masks one key test ($tcflags)" } +delta_simple_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.1.0/25 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.3.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_err $? "Migrate trace was not hit" + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_fail $? "Migrate trace was hit when no migration should happen" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "delta simple rehash test ($tcflags)" +} + bloom_simple_test() { # Bloom filter requires that the eRP table is used. This test diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index a0a80e1a69e8..e7ffc79561b7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 NUM_NETIFS=6 -source ../../../../net/forwarding/lib.sh source ../../../../net/forwarding/tc_common.sh source devlink_lib_spectrum.sh diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore new file mode 100644 index 000000000000..8a5d9bf63dd4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/.gitignore @@ -0,0 +1 @@ +binderfs_test diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile new file mode 100644 index 000000000000..58cb659b56b4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := binderfs_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c new file mode 100644 index 000000000000..8c2ed962e1c7 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <linux/android/binder.h> +#include <linux/android/binderfs.h> +#include "../../kselftest.h" + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static void write_to_file(const char *filename, const void *buf, size_t count, + int allowed_errno) +{ + int fd, saved_errno; + ssize_t ret; + + fd = open(filename, O_WRONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg("%s - Failed to open file %s\n", + strerror(errno), filename); + + ret = write_nointr(fd, buf, count); + if (ret < 0) { + if (allowed_errno && (errno == allowed_errno)) { + close(fd); + return; + } + + goto on_error; + } + + if ((size_t)ret != count) + goto on_error; + + close(fd); + return; + +on_error: + saved_errno = errno; + close(fd); + errno = saved_errno; + + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to write to file %s\n", + strerror(errno), filename); + + ksft_exit_fail_msg("Failed to write to file %s\n", filename); +} + +static void change_to_userns(void) +{ + int ret; + uid_t uid; + gid_t gid; + /* {g,u}id_map files only allow a max of 4096 bytes written to them */ + char idmap[4096]; + + uid = getuid(); + gid = getgid(); + + ret = unshare(CLONE_NEWUSER); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", + strerror(errno)); + + write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0); + + ret = setgid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); + + ret = setuid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); +} + +static void change_to_mountns(void) +{ + int ret; + + ret = unshare(CLONE_NEWNS); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n", + strerror(errno)); + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to mount / as private\n", + strerror(errno)); +} + +static void rmdir_protect_errno(const char *dir) +{ + int saved_errno = errno; + (void)rmdir(dir); + errno = saved_errno; +} + +static void __do_binderfs_test(void) +{ + int fd, ret, saved_errno; + size_t len; + ssize_t wret; + bool keep = false; + struct binderfs_device device = { 0 }; + struct binder_version version = { 0 }; + + change_to_mountns(); + + ret = mkdir("/dev/binderfs", 0755); + if (ret < 0) { + if (errno != EEXIST) + ksft_exit_fail_msg( + "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + + keep = true; + } + + ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + if (ret < 0) { + if (errno != ENODEV) + ksft_exit_fail_msg("%s - Failed to mount binderfs\n", + strerror(errno)); + + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_skip( + "The Android binderfs filesystem is not available\n"); + } + + /* binderfs mount test passed */ + ksft_inc_pass_cnt(); + + memcpy(device.name, "my-binder", strlen("my-binder")); + + fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg( + "%s - Failed to open binder-control device\n", + strerror(errno)); + + ret = ioctl(fd, BINDER_CTL_ADD, &device); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to allocate new binder device\n", + strerror(errno)); + } + + ksft_print_msg( + "Allocated new binder device with major %d, minor %d, and name %s\n", + device.major, device.minor, device.name); + + /* binder device allocation test passed */ + ksft_inc_pass_cnt(); + + fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY); + if (fd < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to open my-binder device\n", + strerror(errno)); + } + + ret = ioctl(fd, BINDER_VERSION, &version); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to open perform BINDER_VERSION request\n", + strerror(errno)); + } + + ksft_print_msg("Detected binder version: %d\n", + version.protocol_version); + + /* binder transaction with binderfs binder device passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/my-binder"); + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to delete binder device\n", + strerror(errno)); + } + + /* binder device removal passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/binder-control"); + if (!ret) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("Managed to delete binder-control device\n"); + } else if (errno != EPERM) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to delete binder-control device but exited with unexpected error code\n", + strerror(errno)); + } + + /* binder-control device removal failed as expected */ + ksft_inc_xfail_cnt(); + +on_error: + ret = umount2("/dev/binderfs", MNT_DETACH); + keep ?: rmdir_protect_errno("/dev/binderfs"); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", + strerror(errno)); + + /* binderfs unmount test passed */ + ksft_inc_pass_cnt(); +} + +static void binderfs_test_privileged() +{ + if (geteuid() != 0) + ksft_print_msg( + "Tests are not run as root. Skipping privileged tests\n"); + else + __do_binderfs_test(); +} + +static void binderfs_test_unprivileged() +{ + change_to_userns(); + __do_binderfs_test(); +} + +int main(int argc, char *argv[]) +{ + binderfs_test_privileged(); + binderfs_test_unprivileged(); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config new file mode 100644 index 000000000000..02dd6cc9cf99 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/config @@ -0,0 +1,3 @@ +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_ANDROID_BINDER_IPC=y diff --git a/tools/testing/selftests/ir/Makefile b/tools/testing/selftests/ir/Makefile index f4ba8eb84b95..ad06489c22a5 100644 --- a/tools/testing/selftests/ir/Makefile +++ b/tools/testing/selftests/ir/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ir_loopback.sh TEST_GEN_PROGS_EXTENDED := ir_loopback +APIDIR := ../../../include/uapi +CFLAGS += -Wall -O2 -I$(APIDIR) include ../lib.mk diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f8f3e90700c0..1e6d14d2825c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 5cd2aed97958..da96eff72a8e 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -10,3 +10,5 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m CONFIG_VETH=m +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4ac50ccb3272..47ddfc154036 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -753,6 +753,20 @@ TEST_F(tls, control_msg) EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); vec.iov_base = buf; + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + record_type = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(record_type, 100); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); + + /* Recv the message again without MSG_PEEK */ + record_type = 0; + memset(buf, 0, sizeof(buf)); + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); cmsg = CMSG_FIRSTHDR(&msg); EXPECT_NE(cmsg, NULL); diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add ip6 dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat to 10.0.2.99 + } +} +EOF + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do + ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 9050eeea5f5f..1de8bd8ccf5d 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -9,6 +9,3 @@ all: $(TEST_PROGS) top_srcdir = ../../../../.. KSFT_KHDR_INSTALL := 1 include ../../lib.mk - -clean: - rm -fr $(TEST_GEN_FILES) diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 82121a81681f..29bac5ef9a93 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,4 +10,5 @@ /proc-uptime-002 /read /self +/setns-dcache /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1c12c34cf85d..434d033ee067 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self +TEST_GEN_PROGS += setns-dcache TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c new file mode 100644 index 000000000000..60ab197a73fc --- /dev/null +++ b/tools/testing/selftests/proc/setns-dcache.c @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWNET) points to new /proc/net content even + * if old one is in dcache. + * + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/socket.h> + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty net namespaces. */ + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWNET) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/net/unix", O_RDONLY); + + if (setns(nsfd, CLONE_NEWNET) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/net/unix", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S "Num RefCount Protocol Flags Type St Inode Path\n" + rv = read(fd, buf, sizeof(buf)); + + assert(rv == strlen(S)); + assert(memcmp(buf, S, strlen(S)) == 0); + } + + return 0; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 496a9a8c773a..7e632b465ab4 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1608,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally) #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else -# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +# define EXPECT_SYSCALL_RETURN(val, action) \ + do { \ + errno = 0; \ + if (val < 0) { \ + EXPECT_EQ(-1, action); \ + EXPECT_EQ(-(val), errno); \ + } else { \ + EXPECT_EQ(val, action); \ + } \ + } while (0) #endif /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for @@ -1647,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) /* Architecture-specific syscall changing routine. */ void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall) + pid_t tracee, int syscall, int result) { int ret; ARCH_REGS regs; @@ -1706,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = EPERM; + regs.SYSCALL_RET = result; #endif #ifdef HAVE_GETREGS @@ -1734,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); break; case 0x1003: - /* skip gettid. */ + /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, 45000); break; case 0x1004: + /* skip openat with error. */ + EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1, -ESRCH); + break; + case 0x1005: /* do nothing (allow getppid) */ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; @@ -1774,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, nr = get_syscall(_metadata, tracee); if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); + if (nr == __NR_gettid) + change_syscall(_metadata, tracee, -1, 45000); if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, -ESRCH); } FIXTURE_DATA(TRACE_syscall) { @@ -1793,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -1842,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, ptrace_syscall_dropped) +TEST_F(TRACE_syscall, ptrace_syscall_errno) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in ESRCH. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_faked) { /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, true); - /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); + /* Tracer should skip the gettid syscall, resulting fake pid. */ + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -1883,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, syscall_dropped) +TEST_F(TRACE_syscall, syscall_errno) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* openat has been skipped and an errno return. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, syscall_faked) { long ret; @@ -1894,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped) ASSERT_EQ(0, ret); /* gettid has been skipped and an altered return value stored. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); - EXPECT_NE(self->mytid, syscall(__NR_gettid)); + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, skip_after_RET_TRACE) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index c02683cfb6c9..7656c7ce79d9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges |