summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-12-12 20:27:41 +0100
committerJakub Kicinski <kuba@kernel.org>2022-12-12 20:27:42 +0100
commit26f708a28454df2062a63fd869e983c379f50ff0 (patch)
treee9580092e7d69af3f9d5add0cd331bad2a6bf708
parentMerge tag 'linux-can-next-for-6.2-20221212' of git://git.kernel.org/pub/scm/l... (diff)
parentMerge branch 'stricter register ID checking in regsafe()' (diff)
downloadlinux-26f708a28454df2062a63fd869e983c379f50ff0.tar.xz
linux-26f708a28454df2062a63fd869e983c379f50ff0.zip
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2022-12-11 We've added 74 non-merge commits during the last 11 day(s) which contain a total of 88 files changed, 3362 insertions(+), 789 deletions(-). The main changes are: 1) Decouple prune and jump points handling in the verifier, from Andrii. 2) Do not rely on ALLOW_ERROR_INJECTION for fmod_ret, from Benjamin. Merged from hid tree. 3) Do not zero-extend kfunc return values. Necessary fix for 32-bit archs, from Björn. 4) Don't use rcu_users to refcount in task kfuncs, from David. 5) Three reg_state->id fixes in the verifier, from Eduard. 6) Optimize bpf_mem_alloc by reusing elements from free_by_rcu, from Hou. 7) Refactor dynptr handling in the verifier, from Kumar. 8) Remove the "/sys" mount and umount dance in {open,close}_netns in bpf selftests, from Martin. 9) Enable sleepable support for cgrp local storage, from Yonghong. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (74 commits) selftests/bpf: test case for relaxed prunning of active_lock.id selftests/bpf: Add pruning test case for bpf_spin_lock bpf: use check_ids() for active_lock comparison selftests/bpf: verify states_equal() maintains idmap across all frames bpf: states_equal() must build idmap for all function frames selftests/bpf: test cases for regsafe() bug skipping check_id() bpf: regsafe() must not skip check_ids() docs/bpf: Add documentation for BPF_MAP_TYPE_SK_STORAGE selftests/bpf: Add test for dynptr reinit in user_ringbuf callback bpf: Use memmove for bpf_dynptr_{read,write} bpf: Move PTR_TO_STACK alignment check to process_dynptr_func bpf: Rework check_func_arg_reg_off bpf: Rework process_dynptr_func bpf: Propagate errors from process_* checks in check_func_arg bpf: Refactor ARG_PTR_TO_DYNPTR checks into process_dynptr_func bpf: Skip rcu_barrier() if rcu_trace_implies_rcu_gp() is true bpf: Reuse freed element in free_by_rcu during allocation selftests/bpf: Bring test_offload.py back to life bpf: Fix comment error in fixup_kfunc_call function bpf: Do not zero-extend kfunc return values ... ==================== Link: https://lore.kernel.org/r/20221212024701.73809-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/bpf/bpf_iterators.rst485
-rw-r--r--Documentation/bpf/index.rst1
-rw-r--r--Documentation/bpf/instruction-set.rst4
-rw-r--r--Documentation/bpf/kfuncs.rst207
-rw-r--r--Documentation/bpf/map_sk_storage.rst155
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c29
-rw-r--r--include/linux/bpf.h9
-rw-r--r--include/linux/bpf_lsm.h6
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/btf.h3
-rw-r--r--include/linux/btf_ids.h1
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/net/dst_metadata.h1
-rw-r--r--include/net/netns/xdp.h2
-rw-r--r--include/net/tcp.h4
-rw-r--r--include/net/xfrm.h17
-rw-r--r--include/uapi/linux/bpf.h8
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c3
-rw-r--r--kernel/bpf/bpf_inode_storage.c4
-rw-r--r--kernel/bpf/bpf_lsm.c16
-rw-r--r--kernel/bpf/bpf_task_storage.c4
-rw-r--r--kernel/bpf/btf.c32
-rw-r--r--kernel/bpf/helpers.c118
-rw-r--r--kernel/bpf/memalloc.c31
-rw-r--r--kernel/bpf/verifier.c696
-rw-r--r--net/bpf/test_run.c14
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/filter.c23
-rw-r--r--net/core/skmsg.c9
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/ipv4/tcp_bpf.c19
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/xfrm/Makefile8
-rw-r--r--net/xfrm/xfrm_interface_bpf.c115
-rw-r--r--net/xfrm/xfrm_interface_core.c (renamed from net/xfrm/xfrm_interface.c)14
-rwxr-xr-xscripts/bpf_doc.py1
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/include/uapi/linux/bpf.h8
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/lib/bpf/Makefile17
-rw-r--r--tools/lib/bpf/bpf.h7
-rw-r--r--tools/lib/bpf/usdt.c8
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch641
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x2
-rw-r--r--tools/testing/selftests/bpf/Makefile8
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h19
-rw-r--r--tools/testing/selftests/bpf/config6
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c314
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xfrm_info.c362
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c1
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c80
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c31
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c1
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c17
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c27
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c60
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c11
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c12
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c51
-rw-r--r--tools/testing/selftests/bpf/progs/xfrm_info.c40
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp13
-rw-r--r--tools/testing/selftests/bpf/test_loader.c233
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py8
-rw-r--r--tools/testing/selftests/bpf/test_progs.h33
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c18
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c84
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c54
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/ringbuf.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/spin_lock.c114
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c49
88 files changed, 3362 insertions, 789 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
new file mode 100644
index 000000000000..6d7770793fab
--- /dev/null
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -0,0 +1,485 @@
+=============
+BPF Iterators
+=============
+
+
+----------
+Motivation
+----------
+
+There are a few existing ways to dump kernel data into user space. The most
+popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
+all tcp6 sockets in the system, and ``cat /proc/net/netlink`` dumps all netlink
+sockets in the system. However, their output format tends to be fixed, and if
+users want more information about these sockets, they have to patch the kernel,
+which often takes time to publish upstream and release. The same is true for popular
+tools like `ss <https://man7.org/linux/man-pages/man8/ss.8.html>`_ where any
+additional information needs a kernel patch.
+
+To solve this problem, the `drgn
+<https://www.kernel.org/doc/html/latest/bpf/drgn.html>`_ tool is often used to
+dig out the kernel data with no kernel change. However, the main drawback for
+drgn is performance, as it cannot do pointer tracing inside the kernel. In
+addition, drgn cannot validate a pointer value and may read invalid data if the
+pointer becomes invalid inside the kernel.
+
+The BPF iterator solves the above problem by providing flexibility on what data
+(e.g., tasks, bpf_maps, etc.) to collect by calling BPF programs for each kernel
+data object.
+
+----------------------
+How BPF Iterators Work
+----------------------
+
+A BPF iterator is a type of BPF program that allows users to iterate over
+specific types of kernel objects. Unlike traditional BPF tracing programs that
+allow users to define callbacks that are invoked at particular points of
+execution in the kernel, BPF iterators allow users to define callbacks that
+should be executed for every entry in a variety of kernel data structures.
+
+For example, users can define a BPF iterator that iterates over every task on
+the system and dumps the total amount of CPU runtime currently used by each of
+them. Another BPF task iterator may instead dump the cgroup information for each
+task. Such flexibility is the core value of BPF iterators.
+
+A BPF program is always loaded into the kernel at the behest of a user space
+process. A user space process loads a BPF program by opening and initializing
+the program skeleton as required and then invoking a syscall to have the BPF
+program verified and loaded by the kernel.
+
+In traditional tracing programs, a program is activated by having user space
+obtain a ``bpf_link`` to the program with ``bpf_program__attach()``. Once
+activated, the program callback will be invoked whenever the tracepoint is
+triggered in the main kernel. For BPF iterator programs, a ``bpf_link`` to the
+program is obtained using ``bpf_link_create()``, and the program callback is
+invoked by issuing system calls from user space.
+
+Next, let us see how you can use the iterators to iterate on kernel objects and
+read data.
+
+------------------------
+How to Use BPF iterators
+------------------------
+
+BPF selftests are a great resource to illustrate how to use the iterators. In
+this section, we’ll walk through a BPF selftest which shows how to load and use
+a BPF iterator program. To begin, we’ll look at `bpf_iter.c
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/prog_tests/bpf_iter.c>`_,
+which illustrates how to load and trigger BPF iterators on the user space side.
+Later, we’ll look at a BPF program that runs in kernel space.
+
+Loading a BPF iterator in the kernel from user space typically involves the
+following steps:
+
+* The BPF program is loaded into the kernel through ``libbpf``. Once the kernel
+ has verified and loaded the program, it returns a file descriptor (fd) to user
+ space.
+* Obtain a ``link_fd`` to the BPF program by calling the ``bpf_link_create()``
+ specified with the BPF program file descriptor received from the kernel.
+* Next, obtain a BPF iterator file descriptor (``bpf_iter_fd``) by calling the
+ ``bpf_iter_create()`` specified with the ``bpf_link`` received from Step 2.
+* Trigger the iteration by calling ``read(bpf_iter_fd)`` until no data is
+ available.
+* Close the iterator fd using ``close(bpf_iter_fd)``.
+* If needed to reread the data, get a new ``bpf_iter_fd`` and do the read again.
+
+The following are a few examples of selftest BPF iterator programs:
+
+* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
+* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
+* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
+
+Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
+
+Here is the definition of ``bpf_iter__task_file`` in `vmlinux.h
+<https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html#btf>`_.
+Any struct name in ``vmlinux.h`` in the format ``bpf_iter__<iter_name>``
+represents a BPF iterator. The suffix ``<iter_name>`` represents the type of
+iterator.
+
+::
+
+ struct bpf_iter__task_file {
+ union {
+ struct bpf_iter_meta *meta;
+ };
+ union {
+ struct task_struct *task;
+ };
+ u32 fd;
+ union {
+ struct file *file;
+ };
+ };
+
+In the above code, the field 'meta' contains the metadata, which is the same for
+all BPF iterator programs. The rest of the fields are specific to different
+iterators. For example, for task_file iterators, the kernel layer provides the
+'task', 'fd' and 'file' field values. The 'task' and 'file' are `reference
+counted
+<https://facebookmicrosites.github.io/bpf/blog/2018/08/31/object-lifetime.html#file-descriptors-and-reference-counters>`_,
+so they won't go away when the BPF program runs.
+
+Here is a snippet from the ``bpf_iter_task_file.c`` file:
+
+::
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
+
+ if (task == NULL || file == NULL)
+ return 0;
+
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
+ BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
+
+ if (last_tgid != task->tgid) {
+ last_tgid = task->tgid;
+ unique_tgid_count++;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+In the above example, the section name ``SEC(iter/task_file)``, indicates that
+the program is a BPF iterator program to iterate all files from all tasks. The
+context of the program is ``bpf_iter__task_file`` struct.
+
+The user space program invokes the BPF iterator program running in the kernel
+by issuing a ``read()`` syscall. Once invoked, the BPF
+program can export data to user space using a variety of BPF helper functions.
+You can use either ``bpf_seq_printf()`` (and BPF_SEQ_PRINTF helper macro) or
+``bpf_seq_write()`` function based on whether you need formatted output or just
+binary data, respectively. For binary-encoded data, the user space applications
+can process the data from ``bpf_seq_write()`` as needed. For the formatted data,
+you can use ``cat <path>`` to print the results similar to ``cat
+/proc/net/netlink`` after pinning the BPF iterator to the bpffs mount. Later,
+use ``rm -f <path>`` to remove the pinned iterator.
+
+For example, you can use the following command to create a BPF iterator from the
+``bpf_iter_ipv6_route.o`` object file and pin it to the ``/sys/fs/bpf/my_route``
+path:
+
+::
+
+ $ bpftool iter pin ./bpf_iter_ipv6_route.o /sys/fs/bpf/my_route
+
+And then print out the results using the following command:
+
+::
+
+ $ cat /sys/fs/bpf/my_route
+
+
+-------------------------------------------------------
+Implement Kernel Support for BPF Iterator Program Types
+-------------------------------------------------------
+
+To implement a BPF iterator in the kernel, the developer must make a one-time
+change to the following key data structure defined in the `bpf.h
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/include/linux/bpf.h>`_
+file.
+
+::
+
+ struct bpf_iter_reg {
+ const char *target;
+ bpf_iter_attach_target_t attach_target;
+ bpf_iter_detach_target_t detach_target;
+ bpf_iter_show_fdinfo_t show_fdinfo;
+ bpf_iter_fill_link_info_t fill_link_info;
+ bpf_iter_get_func_proto_t get_func_proto;
+ u32 ctx_arg_info_size;
+ u32 feature;
+ struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
+ const struct bpf_iter_seq_info *seq_info;
+ };
+
+After filling the data structure fields, call ``bpf_iter_reg_target()`` to
+register the iterator to the main BPF iterator subsystem.
+
+The following is the breakdown for each field in struct ``bpf_iter_reg``.
+
+.. list-table::
+ :widths: 25 50
+ :header-rows: 1
+
+ * - Fields
+ - Description
+ * - target
+ - Specifies the name of the BPF iterator. For example: ``bpf_map``,
+ ``bpf_map_elem``. The name should be different from other ``bpf_iter`` target names in the kernel.
+ * - attach_target and detach_target
+ - Allows for target specific ``link_create`` action since some targets
+ may need special processing. Called during the user space link_create stage.
+ * - show_fdinfo and fill_link_info
+ - Called to fill target specific information when user tries to get link
+ info associated with the iterator.
+ * - get_func_proto
+ - Permits a BPF iterator to access BPF helpers specific to the iterator.
+ * - ctx_arg_info_size and ctx_arg_info
+ - Specifies the verifier states for BPF program arguments associated with
+ the bpf iterator.
+ * - feature
+ - Specifies certain action requests in the kernel BPF iterator
+ infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
+ that the kernel function cond_resched() is called to avoid other kernel
+ subsystem (e.g., rcu) misbehaving.
+ * - seq_info
+ - Specifies certain action requests in the kernel BPF iterator
+ infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
+ that the kernel function cond_resched() is called to avoid other kernel
+ subsystem (e.g., rcu) misbehaving.
+
+
+`Click here
+<https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_
+to see an implementation of the ``task_vma`` BPF iterator in the kernel.
+
+---------------------------------
+Parameterizing BPF Task Iterators
+---------------------------------
+
+By default, BPF iterators walk through all the objects of the specified types
+(processes, cgroups, maps, etc.) across the entire system to read relevant
+kernel data. But often, there are cases where we only care about a much smaller
+subset of iterable kernel objects, such as only iterating tasks within a
+specific process. Therefore, BPF iterator programs support filtering out objects
+from iteration by allowing user space to configure the iterator program when it
+is attached.
+
+--------------------------
+BPF Task Iterator Program
+--------------------------
+
+The following code is a BPF iterator program to print files and task information
+through the ``seq_file`` of the iterator. It is a standard BPF iterator program
+that visits every file of an iterator. We will use this BPF program in our
+example later.
+
+::
+
+ #include <vmlinux.h>
+ #include <bpf/bpf_helpers.h>
+
+ char _license[] SEC("license") = "GPL";
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
+ if (task == NULL || file == NULL)
+ return 0;
+ if (ctx->meta->seq_num == 0) {
+ BPF_SEQ_PRINTF(seq, " tgid pid fd file\n");
+ }
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+----------------------------------------
+Creating a File Iterator with Parameters
+----------------------------------------
+
+Now, let us look at how to create an iterator that includes only files of a
+process.
+
+First, fill the ``bpf_iter_attach_opts`` struct as shown below:
+
+::
+
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+``linfo.task.pid``, if it is non-zero, directs the kernel to create an iterator
+that only includes opened files for the process with the specified ``pid``. In
+this example, we will only be iterating files for our process. If
+``linfo.task.pid`` is zero, the iterator will visit every opened file of every
+process. Similarly, ``linfo.task.tid`` directs the kernel to create an iterator
+that visits opened files of a specific thread, not a process. In this example,
+``linfo.task.tid`` is different from ``linfo.task.pid`` only if the thread has a
+separate file descriptor table. In most circumstances, all process threads share
+a single file descriptor table.
+
+Now, in the userspace program, pass the pointer of struct to the
+``bpf_program__attach_iter()``.
+
+::
+
+ link = bpf_program__attach_iter(prog, &opts); iter_fd =
+ bpf_iter_create(bpf_link__fd(link));
+
+If both *tid* and *pid* are zero, an iterator created from this struct
+``bpf_iter_attach_opts`` will include every opened file of every task in the
+system (in the namespace, actually.) It is the same as passing a NULL as the
+second argument to ``bpf_program__attach_iter()``.
+
+The whole program looks like the following code:
+
+::
+
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <bpf/bpf.h>
+ #include <bpf/libbpf.h>
+ #include "bpf_iter_task_ex.skel.h"
+
+ static int do_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
+ {
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd = -1, len;
+ int ret = 0;
+
+ link = bpf_program__attach_iter(prog, opts);
+ if (!link) {
+ fprintf(stderr, "bpf_program__attach_iter() fails\n");
+ return -1;
+ }
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (iter_fd < 0) {
+ fprintf(stderr, "bpf_iter_create() fails\n");
+ ret = -1;
+ goto free_link;
+ }
+ /* not check contents, but ensure read() ends without error */
+ while ((len = read(iter_fd, buf, sizeof(buf) - 1)) > 0) {
+ buf[len] = 0;
+ printf("%s", buf);
+ }
+ printf("\n");
+ free_link:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ return 0;
+ }
+
+ static void test_task_file(void)
+ {
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_task_ex *skel;
+ union bpf_iter_link_info linfo;
+ skel = bpf_iter_task_ex__open_and_load();
+ if (skel == NULL)
+ return;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ printf("PID %d\n", getpid());
+ do_read_opts(skel->progs.dump_task_file, &opts);
+ bpf_iter_task_ex__destroy(skel);
+ }
+
+ int main(int argc, const char * const * argv)
+ {
+ test_task_file();
+ return 0;
+ }
+
+The following lines are the output of the program.
+::
+
+ PID 1859
+
+ tgid pid fd file
+ 1859 1859 0 ffffffff82270aa0
+ 1859 1859 1 ffffffff82270aa0
+ 1859 1859 2 ffffffff82270aa0
+ 1859 1859 3 ffffffff82272980
+ 1859 1859 4 ffffffff8225e120
+ 1859 1859 5 ffffffff82255120
+ 1859 1859 6 ffffffff82254f00
+ 1859 1859 7 ffffffff82254d80
+ 1859 1859 8 ffffffff8225abe0
+
+------------------
+Without Parameters
+------------------
+
+Let us look at how a BPF iterator without parameters skips files of other
+processes in the system. In this case, the BPF program has to check the pid or
+the tid of tasks, or it will receive every opened file in the system (in the
+current *pid* namespace, actually). So, we usually add a global variable in the
+BPF program to pass a *pid* to the BPF program.
+
+The BPF program would look like the following block.
+
+ ::
+
+ ......
+ int target_pid = 0;
+
+ SEC("iter/task_file")
+ int dump_task_file(struct bpf_iter__task_file *ctx)
+ {
+ ......
+ if (task->tgid != target_pid) /* Check task->pid instead to check thread IDs */
+ return 0;
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+ }
+
+The user space program would look like the following block:
+
+ ::
+
+ ......
+ static void test_task_file(void)
+ {
+ ......
+ skel = bpf_iter_task_ex__open_and_load();
+ if (skel == NULL)
+ return;
+ skel->bss->target_pid = getpid(); /* process ID. For thread id, use gettid() */
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ ......
+ }
+
+``target_pid`` is a global variable in the BPF program. The user space program
+should initialize the variable with a process ID to skip opened files of other
+processes in the BPF program. When you parametrize a BPF iterator, the iterator
+calls the BPF program fewer times which can save significant resources.
+
+---------------------------
+Parametrizing VMA Iterators
+---------------------------
+
+By default, a BPF VMA iterator includes every VMA in every process. However,
+you can still specify a process or a thread to include only its VMAs. Unlike
+files, a thread can not have a separate address space (since Linux 2.6.0-test6).
+Here, using *tid* makes no difference from using *pid*.
+
+----------------------------
+Parametrizing Task Iterators
+----------------------------
+
+A BPF task iterator with *pid* includes all tasks (threads) of a process. The
+BPF program receives these tasks one after another. You can specify a BPF task
+iterator with *tid* parameter to include only the tasks that match the given
+*tid*.
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 1088d44634d6..b81533d8b061 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -24,6 +24,7 @@ that goes into great technical depth about the BPF Architecture.
maps
bpf_prog_run
classic_vs_extended.rst
+ bpf_iterators
bpf_licensing
test_debug
clang-notes
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 5d798437dad4..e672d5ec6cc7 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -122,11 +122,11 @@ BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
``BPF_XOR | BPF_K | BPF_ALU`` means::
- src_reg = (u32) src_reg ^ (u32) imm32
+ dst_reg = (u32) dst_reg ^ (u32) imm32
``BPF_XOR | BPF_K | BPF_ALU64`` means::
- src_reg = src_reg ^ imm32
+ dst_reg = dst_reg ^ imm32
Byte swap instructions
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 90774479ab7a..9fd7fb539f85 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -191,6 +191,15 @@ rebooting or panicking. Due to this additional restrictions apply to these
calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
added later.
+2.4.8 KF_RCU flag
+-----------------
+
+The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
+When used together with KF_ACQUIRE, it indicates the kfunc should have a
+single argument which must be a trusted argument or a MEM_RCU pointer.
+The argument may have reference count of 0 and the kfunc must take this
+into consideration.
+
2.5 Registering the kfuncs
--------------------------
@@ -213,3 +222,201 @@ type. An example is shown below::
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set);
}
late_initcall(init_subsystem);
+
+3. Core kfuncs
+==============
+
+The BPF subsystem provides a number of "core" kfuncs that are potentially
+applicable to a wide variety of different possible use cases and programs.
+Those kfuncs are documented here.
+
+3.1 struct task_struct * kfuncs
+-------------------------------
+
+There are a number of kfuncs that allow ``struct task_struct *`` objects to be
+used as kptrs:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_task_acquire bpf_task_release
+
+These kfuncs are useful when you want to acquire or release a reference to a
+``struct task_struct *`` that was passed as e.g. a tracepoint arg, or a
+struct_ops callback arg. For example:
+
+.. code-block:: c
+
+ /**
+ * A trivial example tracepoint program that shows how to
+ * acquire and release a struct task_struct * pointer.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_acquire_release_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+
+ /*
+ * In a typical program you'd do something like store
+ * the task in a map, and the map will automatically
+ * release it later. Here, we release it manually.
+ */
+ bpf_task_release(acquired);
+ return 0;
+ }
+
+----
+
+A BPF program can also look up a task from a pid. This can be useful if the
+caller doesn't have a trusted pointer to a ``struct task_struct *`` object that
+it can acquire a reference on with bpf_task_acquire().
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_task_from_pid
+
+Here is an example of it being used:
+
+.. code-block:: c
+
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_get_pid_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *lookup;
+
+ lookup = bpf_task_from_pid(task->pid);
+ if (!lookup)
+ /* A task should always be found, as %task is a tracepoint arg. */
+ return -ENOENT;
+
+ if (lookup->pid != task->pid) {
+ /* bpf_task_from_pid() looks up the task via its
+ * globally-unique pid from the init_pid_ns. Thus,
+ * the pid of the lookup task should always be the
+ * same as the input task.
+ */
+ bpf_task_release(lookup);
+ return -EINVAL;
+ }
+
+ /* bpf_task_from_pid() returns an acquired reference,
+ * so it must be dropped before returning from the
+ * tracepoint handler.
+ */
+ bpf_task_release(lookup);
+ return 0;
+ }
+
+3.2 struct cgroup * kfuncs
+--------------------------
+
+``struct cgroup *`` objects also have acquire and release functions:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_acquire bpf_cgroup_release
+
+These kfuncs are used in exactly the same manner as bpf_task_acquire() and
+bpf_task_release() respectively, so we won't provide examples for them.
+
+----
+
+You may also acquire a reference to a ``struct cgroup`` kptr that's already
+stored in a map using bpf_cgroup_kptr_get():
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_kptr_get
+
+Here's an example of how it can be used:
+
+.. code-block:: c
+
+ /* struct containing the struct task_struct kptr which is actually stored in the map. */
+ struct __cgroups_kfunc_map_value {
+ struct cgroup __kptr_ref * cgroup;
+ };
+
+ /* The map containing struct __cgroups_kfunc_map_value entries. */
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __cgroups_kfunc_map_value);
+ __uint(max_entries, 1);
+ } __cgroups_kfunc_map SEC(".maps");
+
+ /* ... */
+
+ /**
+ * A simple example tracepoint program showing how a
+ * struct cgroup kptr that is stored in a map can
+ * be acquired using the bpf_cgroup_kptr_get() kfunc.
+ */
+ SEC("tp_btf/cgroup_mkdir")
+ int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
+ {
+ struct cgroup *kptr;
+ struct __cgroups_kfunc_map_value *v;
+ s32 id = cgrp->self.id;
+
+ /* Assume a cgroup kptr was previously stored in the map. */
+ v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
+ if (!v)
+ return -ENOENT;
+
+ /* Acquire a reference to the cgroup kptr that's already stored in the map. */
+ kptr = bpf_cgroup_kptr_get(&v->cgroup);
+ if (!kptr)
+ /* If no cgroup was present in the map, it's because
+ * we're racing with another CPU that removed it with
+ * bpf_kptr_xchg() between the bpf_map_lookup_elem()
+ * above, and our call to bpf_cgroup_kptr_get().
+ * bpf_cgroup_kptr_get() internally safely handles this
+ * race, and will return NULL if the task is no longer
+ * present in the map by the time we invoke the kfunc.
+ */
+ return -EBUSY;
+
+ /* Free the reference we just took above. Note that the
+ * original struct cgroup kptr is still in the map. It will
+ * be freed either at a later time if another context deletes
+ * it from the map, or automatically by the BPF subsystem if
+ * it's still present when the map is destroyed.
+ */
+ bpf_cgroup_release(kptr);
+
+ return 0;
+ }
+
+----
+
+Another kfunc available for interacting with ``struct cgroup *`` objects is
+bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
+and return it as a cgroup kptr.
+
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_ancestor
+
+Eventually, BPF should be updated to allow this to happen with a normal memory
+load in the program itself. This is currently not possible without more work in
+the verifier. bpf_cgroup_ancestor() can be used as follows:
+
+.. code-block:: c
+
+ /**
+ * Simple tracepoint example that illustrates how a cgroup's
+ * ancestor can be accessed using bpf_cgroup_ancestor().
+ */
+ SEC("tp_btf/cgroup_mkdir")
+ int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
+ {
+ struct cgroup *parent;
+
+ /* The parent cgroup resides at the level before the current cgroup's level. */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent)
+ return -ENOENT;
+
+ bpf_printk("Parent id is %d", parent->self.id);
+
+ /* Return the parent cgroup that was acquired above. */
+ bpf_cgroup_release(parent);
+ return 0;
+ }
diff --git a/Documentation/bpf/map_sk_storage.rst b/Documentation/bpf/map_sk_storage.rst
new file mode 100644
index 000000000000..047e16c8aaa8
--- /dev/null
+++ b/Documentation/bpf/map_sk_storage.rst
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=======================
+BPF_MAP_TYPE_SK_STORAGE
+=======================
+
+.. note::
+ - ``BPF_MAP_TYPE_SK_STORAGE`` was introduced in kernel version 5.2
+
+``BPF_MAP_TYPE_SK_STORAGE`` is used to provide socket-local storage for BPF
+programs. A map of type ``BPF_MAP_TYPE_SK_STORAGE`` declares the type of storage
+to be provided and acts as the handle for accessing the socket-local
+storage. The values for maps of type ``BPF_MAP_TYPE_SK_STORAGE`` are stored
+locally with each socket instead of with the map. The kernel is responsible for
+allocating storage for a socket when requested and for freeing the storage when
+either the map or the socket is deleted.
+
+.. note::
+ - The key type must be ``int`` and ``max_entries`` must be set to ``0``.
+ - The ``BPF_F_NO_PREALLOC`` flag must be used when creating a map for
+ socket-local storage.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_sk_storage_get()
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
+
+Socket-local storage can be retrieved using the ``bpf_sk_storage_get()``
+helper. The helper gets the storage from ``sk`` that is associated with ``map``.
+If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` flag is used then
+``bpf_sk_storage_get()`` will create the storage for ``sk`` if it does not
+already exist. ``value`` can be used together with
+``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise it
+will be zero initialized. Returns a pointer to the storage on success, or
+``NULL`` in case of failure.
+
+.. note::
+ - ``sk`` is a kernel ``struct sock`` pointer for LSM or tracing programs.
+ - ``sk`` is a ``struct bpf_sock`` pointer for other program types.
+
+bpf_sk_storage_delete()
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
+
+Socket-local storage can be deleted using the ``bpf_sk_storage_delete()``
+helper. The helper deletes the storage from ``sk`` that is identified by
+``map``. Returns ``0`` on success, or negative error in case of failure.
+
+User space
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be added or updated using the ``bpf_map_update_elem()`` libbpf
+function. ``key`` must be a pointer to a valid ``fd`` in the user space
+program. The ``flags`` parameter can be used to control the update behaviour:
+
+- ``BPF_ANY`` will create storage for ``fd`` or update existing storage.
+- ``BPF_NOEXIST`` will create storage for ``fd`` only if it did not already
+ exist, otherwise the call will fail with ``-EEXIST``.
+- ``BPF_EXIST`` will update existing storage for ``fd`` if it already exists,
+ otherwise the call will fail with ``-ENOENT``.
+
+Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_lookup_elem(int map_fd, const void *key, void *value)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be retrieved using the ``bpf_map_lookup_elem()`` libbpf
+function. ``key`` must be a pointer to a valid ``fd`` in the user space
+program. Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+ int bpf_map_delete_elem(int map_fd, const void *key)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be deleted using the ``bpf_map_delete_elem()`` libbpf
+function. Returns ``0`` on success, or negative error in case of failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare socket-local storage in a BPF program:
+
+.. code-block:: c
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct my_storage);
+ } socket_storage SEC(".maps");
+
+This snippet shows how to retrieve socket-local storage in a BPF program:
+
+.. code-block:: c
+
+ SEC("sockops")
+ int _sockops(struct bpf_sock_ops *ctx)
+ {
+ struct my_storage *storage;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ storage = bpf_sk_storage_get(&socket_storage, sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ /* Use 'storage' here */
+
+ return 1;
+ }
+
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples.
+
+References
+==========
+
+https://lwn.net/ml/netdev/20190426171103.61892-1-kafai@fb.com/
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 00df3a8f92ac..f2417ac54edd 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -136,6 +136,25 @@ static bool in_auipc_jalr_range(s64 val)
val < ((1L << 31) - (1L << 11));
}
+/* Emit fixed-length instructions for address */
+static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
+{
+ u64 ip = (u64)(ctx->insns + ctx->ninsns);
+ s64 off = addr - ip;
+ s64 upper = (off + (1 << 11)) >> 12;
+ s64 lower = off & 0xfff;
+
+ if (extra_pass && !in_auipc_jalr_range(off)) {
+ pr_err("bpf-jit: target offset 0x%llx is out of range\n", off);
+ return -ERANGE;
+ }
+
+ emit(rv_auipc(rd, upper), ctx);
+ emit(rv_addi(rd, rd, lower), ctx);
+ return 0;
+}
+
+/* Emit variable-length instructions for 32-bit and 64-bit imm */
static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
{
/* Note that the immediate from the add is sign-extended,
@@ -1050,7 +1069,15 @@ out_be:
u64 imm64;
imm64 = (u64)insn1.imm << 32 | (u32)imm;
- emit_imm(rd, imm64, ctx);
+ if (bpf_pseudo_func(insn)) {
+ /* fixed-length insns for extra jit pass */
+ ret = emit_addr(rd, imm64, extra_pass, ctx);
+ if (ret)
+ return ret;
+ } else {
+ emit_imm(rd, imm64, ctx);
+ }
+
return 1;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 67452103bb86..3de24cfb7a3d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -775,7 +775,7 @@ enum bpf_reg_type {
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
- PTR_TO_DYNPTR, /* reg points to a dynptr */
+ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
__BPF_REG_TYPE_MAX,
/* Extended reg_types. */
@@ -1909,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void)
return perfmon_capable();
}
-static inline bool bpf_allow_ptr_to_map_access(void)
-{
- return perfmon_capable();
-}
-
static inline bool bpf_bypass_spec_v1(void)
{
return perfmon_capable();
@@ -2833,7 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 4bcf76a9bb06..1de7ece5d36d 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog);
bool bpf_lsm_is_sleepable_hook(u32 btf_id);
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog);
static inline struct bpf_storage_blob *bpf_inode(
const struct inode *inode)
@@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
return false;
}
+static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+ return false;
+}
+
static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c05aa6e1f6f5..53d175cbaa02 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -273,9 +273,9 @@ struct bpf_id_pair {
u32 cur;
};
-/* Maximum number of register states that can exist at once */
-#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
#define MAX_CALL_FRAMES 8
+/* Maximum number of register states that can exist at once */
+#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
struct bpf_verifier_state {
/* call stack tracking */
struct bpf_func_state *frame[MAX_CALL_FRAMES];
@@ -452,6 +452,7 @@ struct bpf_insn_aux_data {
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
bool prune_point;
+ bool jmp_point;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -531,7 +532,6 @@ struct bpf_verifier_env {
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
- bool allow_ptr_to_map_access;
bool bpf_capable;
bool bypass_spec_v1;
bool bypass_spec_v4;
@@ -615,11 +615,9 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg);
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- enum bpf_arg_type arg_type);
+struct bpf_call_arg_meta;
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+ enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
@@ -683,7 +681,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
}
}
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9ed00077db6e..5f628f323442 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,6 +70,7 @@
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
+#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
/*
* Return the name of the passed struct, if exists, or halt the build if for
@@ -477,8 +478,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 93397711a68c..3a4f7cd882ca 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -266,5 +266,6 @@ MAX_BTF_TRACING_TYPE,
extern u32 btf_tracing_ids[];
extern u32 bpf_cgroup_btf_id[];
+extern u32 bpf_local_storage_map_btf_id[];
#endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 70d6cb94e580..84f787416a54 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -82,6 +82,7 @@ struct sk_psock {
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
+ bool redir_ingress; /* undefined if sk_redir is null */
struct sk_msg *cork;
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index a454cf4327fe..1b7fae4c6b24 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -26,6 +26,7 @@ struct macsec_info {
struct xfrm_md_info {
u32 if_id;
int link;
+ struct dst_entry *dst_orig;
};
struct metadata_dst {
diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h
index e5734261ba0a..21a4f25a187a 100644
--- a/include/net/netns/xdp.h
+++ b/include/net/netns/xdp.h
@@ -2,8 +2,8 @@
#ifndef __NETNS_XDP_H__
#define __NETNS_XDP_H__
-#include <linux/rculist.h>
#include <linux/mutex.h>
+#include <linux/types.h>
struct netns_xdp {
struct mutex lock;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f925377066fe..db9f828e9d1e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2323,8 +2323,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
- int flags);
+int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
+ struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5413cdd5ad62..3e1f70e8e424 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2164,4 +2164,21 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
return false;
}
#endif
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern struct metadata_dst __percpu *xfrm_bpf_md_dst;
+
+int register_xfrm_interface_bpf(void);
+
+#else
+
+static inline int register_xfrm_interface_bpf(void)
+{
+ return 0;
+}
+
+#endif
+
#endif /* _NET_XFRM_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f89de51a45db..464ca3f01fe7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5293,7 +5293,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5303,7 +5303,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5313,7 +5313,7 @@ union bpf_attr {
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
*
- * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -5414,7 +5414,7 @@ union bpf_attr {
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
- * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 309403800f82..6cdf6d9ed91d 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -211,7 +211,6 @@ BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgro
return ret;
}
-BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops cgrp_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -222,7 +221,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = {
.map_update_elem = bpf_cgrp_storage_update_elem,
.map_delete_elem = bpf_cgrp_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &cgroup_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = cgroup_storage_ptr,
};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 6a1d4d22816a..05f4c66c9089 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -213,8 +213,6 @@ static void inode_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(map, &inode_cache, NULL);
}
-BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
- bpf_local_storage_map)
const struct bpf_map_ops inode_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -225,7 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_update_elem = bpf_fd_inode_storage_update_elem,
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &inode_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = inode_storage_ptr,
};
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index ae0267f150b5..9ea42a45da47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -345,11 +345,27 @@ BTF_ID(func, bpf_lsm_task_to_inode)
BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
+BTF_SET_START(untrusted_lsm_hooks)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_prog_free_security)
+BTF_ID(func, bpf_lsm_file_alloc_security)
+BTF_ID(func, bpf_lsm_file_free_security)
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_ID(func, bpf_lsm_task_free)
+BTF_SET_END(untrusted_lsm_hooks)
+
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
{
return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
}
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+ return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id);
+}
+
const struct bpf_prog_ops lsm_prog_ops = {
};
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 8e832db8151a..1e486055a523 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -324,7 +324,7 @@ static void task_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
}
-BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
+BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
const struct bpf_map_ops task_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -335,7 +335,7 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_update_elem = bpf_pid_task_storage_update_elem,
.map_delete_elem = bpf_pid_task_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &task_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = task_storage_ptr,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d11cbf8cece7..f7dd8af06413 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -19,6 +19,7 @@
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_lsm.h>
#include <linux/skmsg.h>
#include <linux/perf_event.h>
#include <linux/bsearch.h>
@@ -205,6 +206,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_STRUCT_OPS,
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
+ BTF_KFUNC_HOOK_FMODRET,
BTF_KFUNC_HOOK_MAX,
};
@@ -5829,6 +5831,7 @@ static bool prog_args_trusted(const struct bpf_prog *prog)
case BPF_PROG_TYPE_TRACING:
return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
case BPF_PROG_TYPE_LSM:
+ return bpf_lsm_is_trusted(prog);
case BPF_PROG_TYPE_STRUCT_OPS:
return true;
default:
@@ -7606,11 +7609,14 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
-/* This function must be invoked only from initcalls/module init functions */
-int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
- const struct btf_kfunc_id_set *kset)
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
+{
+ return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
+}
+
+static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
+ const struct btf_kfunc_id_set *kset)
{
- enum btf_kfunc_hook hook;
struct btf *btf;
int ret;
@@ -7629,13 +7635,29 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
if (IS_ERR(btf))
return PTR_ERR(btf);
- hook = bpf_prog_type_to_kfunc_hook(prog_type);
ret = btf_populate_kfunc_set(btf, hook, kset->set);
btf_put(btf);
return ret;
}
+
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
+{
+ enum btf_kfunc_hook hook;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __register_btf_kfunc_id_set(hook, kset);
+}
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset)
+{
+ return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset);
+}
+EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set);
+
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
{
struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a5a511430f2a..af30c6cbd65d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1404,7 +1404,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
#define DYNPTR_SIZE_MASK 0xFFFFFF
#define DYNPTR_RDONLY_BIT BIT(31)
-static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_RDONLY_BIT;
}
@@ -1414,7 +1414,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
}
@@ -1438,7 +1438,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
memset(ptr, 0, sizeof(*ptr));
}
-static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
{
u32 size = bpf_dynptr_get_size(ptr);
@@ -1483,7 +1483,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
};
-BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
int err;
@@ -1495,7 +1495,11 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
if (err)
return err;
- memcpy(dst, src->data + src->offset + offset, len);
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst, src->data + src->offset + offset, len);
return 0;
}
@@ -1506,12 +1510,12 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
- .arg3_type = ARG_PTR_TO_DYNPTR,
+ .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
int err;
@@ -1523,7 +1527,11 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
if (err)
return err;
- memcpy(dst->data + dst->offset + offset, src, len);
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst->data + dst->offset + offset, src, len);
return 0;
}
@@ -1532,14 +1540,14 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
.func = bpf_dynptr_write,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
int err;
@@ -1560,7 +1568,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
.func = bpf_dynptr_data,
.gpl_only = false,
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
- .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
};
@@ -1833,8 +1841,59 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
*/
struct task_struct *bpf_task_acquire(struct task_struct *p)
{
- refcount_inc(&p->rcu_users);
- return p;
+ return get_task_struct(p);
+}
+
+/**
+ * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
+ * acquired by this kfunc which is not stored in a map as a kptr, must be
+ * released by calling bpf_task_release().
+ * @p: The task on which a reference is being acquired.
+ */
+struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
+{
+ /* For the time being this function returns NULL, as it's not currently
+ * possible to safely acquire a reference to a task with RCU protection
+ * using get_task_struct() and put_task_struct(). This is due to the
+ * slightly odd mechanics of p->rcu_users, and how task RCU protection
+ * works.
+ *
+ * A struct task_struct is refcounted by two different refcount_t
+ * fields:
+ *
+ * 1. p->usage: The "true" refcount field which tracks a task's
+ * lifetime. The task is freed as soon as this
+ * refcount drops to 0.
+ *
+ * 2. p->rcu_users: An "RCU users" refcount field which is statically
+ * initialized to 2, and is co-located in a union with
+ * a struct rcu_head field (p->rcu). p->rcu_users
+ * essentially encapsulates a single p->usage
+ * refcount, and when p->rcu_users goes to 0, an RCU
+ * callback is scheduled on the struct rcu_head which
+ * decrements the p->usage refcount.
+ *
+ * There are two important implications to this task refcounting logic
+ * described above. The first is that
+ * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
+ * after the refcount goes to 0, the RCU callback being scheduled will
+ * cause the memory backing the refcount to again be nonzero due to the
+ * fields sharing a union. The other is that we can't rely on RCU to
+ * guarantee that a task is valid in a BPF program. This is because a
+ * task could have already transitioned to being in the TASK_DEAD
+ * state, had its rcu_users refcount go to 0, and its rcu callback
+ * invoked in which it drops its single p->usage reference. At this
+ * point the task will be freed as soon as the last p->usage reference
+ * goes to 0, without waiting for another RCU gp to elapse. The only
+ * way that a BPF program can guarantee that a task is valid is in this
+ * scenario is to hold a p->usage refcount itself.
+ *
+ * Until we're able to resolve this issue, either by pulling
+ * p->rcu_users and p->rcu out of the union, or by getting rid of
+ * p->usage and just using p->rcu_users for refcounting, we'll just
+ * return NULL here.
+ */
+ return NULL;
}
/**
@@ -1845,33 +1904,15 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
*/
struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
{
- struct task_struct *p;
-
- rcu_read_lock();
- p = READ_ONCE(*pp);
-
- /* Another context could remove the task from the map and release it at
- * any time, including after we've done the lookup above. This is safe
- * because we're in an RCU read region, so the task is guaranteed to
- * remain valid until at least the rcu_read_unlock() below.
+ /* We must return NULL here until we have clarity on how to properly
+ * leverage RCU for ensuring a task's lifetime. See the comment above
+ * in bpf_task_acquire_not_zero() for more details.
*/
- if (p && !refcount_inc_not_zero(&p->rcu_users))
- /* If the task had been removed from the map and freed as
- * described above, refcount_inc_not_zero() will return false.
- * The task will be freed at some point after the current RCU
- * gp has ended, so just return NULL to the user.
- */
- p = NULL;
- rcu_read_unlock();
-
- return p;
+ return NULL;
}
/**
- * bpf_task_release - Release the reference acquired on a struct task_struct *.
- * If this kfunc is invoked in an RCU read region, the task_struct is
- * guaranteed to not be freed until the current grace period has ended, even if
- * its refcount drops to 0.
+ * bpf_task_release - Release the reference acquired on a task.
* @p: The task on which a reference is being released.
*/
void bpf_task_release(struct task_struct *p)
@@ -1879,7 +1920,7 @@ void bpf_task_release(struct task_struct *p)
if (!p)
return;
- put_task_struct_rcu_user(p);
+ put_task_struct(p);
}
#ifdef CONFIG_CGROUPS
@@ -1927,7 +1968,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
}
/**
- * bpf_cgroup_release - Release the reference acquired on a struct cgroup *.
+ * bpf_cgroup_release - Release the reference acquired on a cgroup.
* If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
* not be freed until the current grace period has ended, even if its refcount
* drops to 0.
@@ -2013,6 +2054,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
#ifdef CONFIG_CGROUPS
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 8f0d65f2474a..ebcc3dd0fa19 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -171,9 +171,24 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
memcg = get_memcg(c);
old_memcg = set_active_memcg(memcg);
for (i = 0; i < cnt; i++) {
- obj = __alloc(c, node);
- if (!obj)
- break;
+ /*
+ * free_by_rcu is only manipulated by irq work refill_work().
+ * IRQ works on the same CPU are called sequentially, so it is
+ * safe to use __llist_del_first() here. If alloc_bulk() is
+ * invoked by the initial prefill, there will be no running
+ * refill_work(), so __llist_del_first() is fine as well.
+ *
+ * In most cases, objects on free_by_rcu are from the same CPU.
+ * If some objects come from other CPUs, it doesn't incur any
+ * harm because NUMA_NO_NODE means the preference for current
+ * numa node and it is not a guarantee.
+ */
+ obj = __llist_del_first(&c->free_by_rcu);
+ if (!obj) {
+ obj = __alloc(c, node);
+ if (!obj)
+ break;
+ }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
/* In RT irq_work runs in per-cpu kthread, so disable
* interrupts to avoid preemption and interrupts and
@@ -449,9 +464,17 @@ static void free_mem_alloc(struct bpf_mem_alloc *ma)
{
/* waiting_for_gp lists was drained, but __free_rcu might
* still execute. Wait for it now before we freeing percpu caches.
+ *
+ * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
+ * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
+ * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
+ * so if call_rcu(head, __free_rcu) is skipped due to
+ * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
+ * using rcu_trace_implies_rcu_gp() as well.
*/
rcu_barrier_tasks_trace();
- rcu_barrier();
+ if (!rcu_trace_implies_rcu_gp())
+ rcu_barrier();
free_mem_alloc_no_barrier(ma);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e7f1d085e53..a5255a0dcbb6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -451,6 +451,11 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON;
}
+static bool type_is_ptr_alloc_obj(u32 type)
+{
+ return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
+}
+
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
@@ -458,7 +463,7 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
if (reg->type == PTR_TO_MAP_VALUE) {
rec = reg->map_ptr->record;
- } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ } else if (type_is_ptr_alloc_obj(reg->type)) {
meta = btf_find_struct_meta(reg->btf, reg->btf_id);
if (meta)
rec = meta->record;
@@ -587,7 +592,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
- [PTR_TO_DYNPTR] = "dynptr_ptr",
+ [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
};
if (type & PTR_MAYBE_NULL) {
@@ -720,6 +725,28 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
return type == BPF_DYNPTR_TYPE_RINGBUF;
}
+static void __mark_dynptr_reg(struct bpf_reg_state *reg,
+ enum bpf_dynptr_type type,
+ bool first_slot);
+
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
+
+static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
+ struct bpf_reg_state *sreg2,
+ enum bpf_dynptr_type type)
+{
+ __mark_dynptr_reg(sreg1, type, true);
+ __mark_dynptr_reg(sreg2, type, false);
+}
+
+static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
+ enum bpf_dynptr_type type)
+{
+ __mark_dynptr_reg(reg, type, true);
+}
+
+
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx)
{
@@ -741,9 +768,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;
- state->stack[spi].spilled_ptr.dynptr.first_slot = true;
- state->stack[spi].spilled_ptr.dynptr.type = type;
- state->stack[spi - 1].spilled_ptr.dynptr.type = type;
+ mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
+ &state->stack[spi - 1].spilled_ptr, type);
if (dynptr_type_refcounted(type)) {
/* The id is used to track proper releasing */
@@ -751,8 +777,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (id < 0)
return id;
- state->stack[spi].spilled_ptr.id = id;
- state->stack[spi - 1].spilled_ptr.id = id;
+ state->stack[spi].spilled_ptr.ref_obj_id = id;
+ state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
return 0;
@@ -774,25 +800,23 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
}
/* Invalidate any slices associated with this dynptr */
- if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
- release_reference(env, state->stack[spi].spilled_ptr.id);
- state->stack[spi].spilled_ptr.id = 0;
- state->stack[spi - 1].spilled_ptr.id = 0;
- }
-
- state->stack[spi].spilled_ptr.dynptr.first_slot = false;
- state->stack[spi].spilled_ptr.dynptr.type = 0;
- state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
+ if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
+ WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
+ __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+ __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
return 0;
}
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
- int i;
+ int spi, i;
+
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return false;
+ spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return true;
@@ -805,13 +829,17 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
return true;
}
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
int i;
+ /* This already represents first slot of initialized bpf_dynptr */
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return true;
+
+ spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;
@@ -825,21 +853,24 @@ bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
return true;
}
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- enum bpf_arg_type arg_type)
+static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type dynptr_type;
- int spi = get_spi(reg->off);
+ int spi;
/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
if (arg_type == ARG_PTR_TO_DYNPTR)
return true;
dynptr_type = arg_to_dynptr_type(arg_type);
-
- return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+ if (reg->type == CONST_PTR_TO_DYNPTR) {
+ return reg->dynptr.type == dynptr_type;
+ } else {
+ spi = get_spi(reg->off);
+ return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+ }
}
/* The reg state of a pointer or a bounded scalar was saved when
@@ -1351,9 +1382,6 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
- struct bpf_reg_state *reg);
-
/* This helper doesn't clear reg->id */
static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
@@ -1416,6 +1444,19 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
__mark_reg_known_zero(regs + regno);
}
+static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
+ bool first_slot)
+{
+ /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
+ * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
+ * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
+ */
+ __mark_reg_known_zero(reg);
+ reg->type = CONST_PTR_TO_DYNPTR;
+ reg->dynptr.type = type;
+ reg->dynptr.first_slot = first_slot;
+}
+
static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
{
if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
@@ -2525,6 +2566,16 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return 0;
}
+static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].jmp_point = true;
+}
+
+static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].jmp_point;
+}
+
/* for any branch, call, exit record the history of jmps in the given state */
static int push_jmp_history(struct bpf_verifier_env *env,
struct bpf_verifier_state *cur)
@@ -2533,6 +2584,9 @@ static int push_jmp_history(struct bpf_verifier_env *env,
struct bpf_idx_pair *p;
size_t alloc_size;
+ if (!is_jmp_point(env, env->insn_idx))
+ return 0;
+
cnt++;
alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
@@ -4275,7 +4329,7 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
return true;
/* If a register is not referenced, it is trusted if it has the
- * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
+ * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
* not.
@@ -4287,6 +4341,11 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
!bpf_type_has_unsafe_modifiers(reg->type);
}
+static bool is_rcu_reg(const struct bpf_reg_state *reg)
+{
+ return reg->type & MEM_RCU;
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -4703,6 +4762,18 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
u32 btf_id;
int ret;
+ if (!env->allow_ptr_leaks) {
+ verbose(env,
+ "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+ tname);
+ return -EPERM;
+ }
+ if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
+ verbose(env,
+ "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
+ tname);
+ return -EINVAL;
+ }
if (off < 0) {
verbose(env,
"R%d is ptr_%s invalid negative access: off=%d\n",
@@ -4773,14 +4844,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (flag & MEM_RCU) {
/* Mark value register as MEM_RCU only if it is protected by
- * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
+ * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
* itself can already indicate trustedness inside the rcu
- * read lock region. Also mark it as PTR_TRUSTED.
+ * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
+ * it could be null in some cases.
*/
- if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
+ if (!env->cur_state->active_rcu_lock ||
+ !(is_trusted_reg(reg) || is_rcu_reg(reg)))
flag &= ~MEM_RCU;
else
- flag |= PTR_TRUSTED;
+ flag |= PTR_MAYBE_NULL;
} else if (reg->type & MEM_RCU) {
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
@@ -4823,9 +4896,9 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
- if (!env->allow_ptr_to_map_access) {
+ if (!env->allow_ptr_leaks) {
verbose(env,
- "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+ "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
tname);
return -EPERM;
}
@@ -5726,7 +5799,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
cur->active_lock.ptr = NULL;
cur->active_lock.id = 0;
- for (i = 0; i < fstate->acquired_refs; i++) {
+ for (i = fstate->acquired_refs - 1; i >= 0; i--) {
int err;
/* Complain on error because this reference state cannot
@@ -5822,6 +5895,119 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
return 0;
}
+/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
+ * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
+ *
+ * In both cases we deal with the first 8 bytes, but need to mark the next 8
+ * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
+ * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
+ *
+ * Mutability of bpf_dynptr is at two levels, one is at the level of struct
+ * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
+ * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
+ * mutate the view of the dynptr and also possibly destroy it. In the latter
+ * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
+ * memory that dynptr points to.
+ *
+ * The verifier will keep track both levels of mutation (bpf_dynptr's in
+ * reg->type and the memory's in reg->dynptr.type), but there is no support for
+ * readonly dynptr view yet, hence only the first case is tracked and checked.
+ *
+ * This is consistent with how C applies the const modifier to a struct object,
+ * where the pointer itself inside bpf_dynptr becomes const but not what it
+ * points to.
+ *
+ * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
+ * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
+ */
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+ enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+
+ /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
+ * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
+ */
+ if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
+ verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
+ return -EFAULT;
+ }
+ /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
+ * check_func_arg_reg_off's logic. We only need to check offset
+ * alignment for PTR_TO_STACK.
+ */
+ if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
+ verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
+ return -EINVAL;
+ }
+ /* MEM_UNINIT - Points to memory that is an appropriate candidate for
+ * constructing a mutable bpf_dynptr object.
+ *
+ * Currently, this is only possible with PTR_TO_STACK
+ * pointing to a region of at least 16 bytes which doesn't
+ * contain an existing bpf_dynptr.
+ *
+ * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
+ * mutated or destroyed. However, the memory it points to
+ * may be mutated.
+ *
+ * None - Points to a initialized dynptr that can be mutated and
+ * destroyed, including mutation of the memory it points
+ * to.
+ */
+ if (arg_type & MEM_UNINIT) {
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
+ verbose(env, "Dynptr has to be an uninitialized dynptr\n");
+ return -EINVAL;
+ }
+
+ /* We only support one dynptr being uninitialized at the moment,
+ * which is sufficient for the helper functions we have right now.
+ */
+ if (meta->uninit_dynptr_regno) {
+ verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
+ return -EFAULT;
+ }
+
+ meta->uninit_dynptr_regno = regno;
+ } else /* MEM_RDONLY and None case from above */ {
+ /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
+ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
+ verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_reg_valid_init(env, reg)) {
+ verbose(env,
+ "Expected an initialized dynptr as arg #%d\n",
+ regno);
+ return -EINVAL;
+ }
+
+ /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
+ if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
+ const char *err_extra = "";
+
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ err_extra = "local";
+ break;
+ case DYNPTR_TYPE_RINGBUF:
+ err_extra = "ringbuf";
+ break;
+ default:
+ err_extra = "<unknown>";
+ break;
+ }
+ verbose(env,
+ "Expected a dynptr of type %s as arg #%d\n",
+ err_extra, regno);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
static bool arg_type_is_mem_size(enum bpf_arg_type type)
{
return type == ARG_CONST_SIZE ||
@@ -5945,7 +6131,7 @@ static const struct bpf_reg_types btf_ptr_types = {
.types = {
PTR_TO_BTF_ID,
PTR_TO_BTF_ID | PTR_TRUSTED,
- PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
+ PTR_TO_BTF_ID | MEM_RCU,
},
};
static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -5962,7 +6148,7 @@ static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }
static const struct bpf_reg_types dynptr_types = {
.types = {
PTR_TO_STACK,
- PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,
+ CONST_PTR_TO_DYNPTR,
}
};
@@ -6091,17 +6277,38 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type)
{
- enum bpf_reg_type type = reg->type;
- bool fixed_off_ok = false;
+ u32 type = reg->type;
- switch ((u32)type) {
- /* Pointer types where reg offset is explicitly allowed: */
- case PTR_TO_STACK:
- if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
- verbose(env, "cannot pass in dynptr at an offset\n");
+ /* When referenced register is passed to release function, its fixed
+ * offset must be 0.
+ *
+ * We will check arg_type_is_release reg has ref_obj_id when storing
+ * meta->release_regno.
+ */
+ if (arg_type_is_release(arg_type)) {
+ /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
+ * may not directly point to the object being released, but to
+ * dynptr pointing to such object, which might be at some offset
+ * on the stack. In that case, we simply to fallback to the
+ * default handling.
+ */
+ if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
+ return 0;
+ /* Doing check_ptr_off_reg check for the offset will catch this
+ * because fixed_off_ok is false, but checking here allows us
+ * to give the user a better error message.
+ */
+ if (reg->off) {
+ verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
+ regno);
return -EINVAL;
}
- fallthrough;
+ return __check_ptr_off_reg(env, reg, regno, false);
+ }
+
+ switch (type) {
+ /* Pointer types where both fixed and variable offset is explicitly allowed: */
+ case PTR_TO_STACK:
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY:
@@ -6112,47 +6319,38 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case SCALAR_VALUE:
- /* Some of the argument types nevertheless require a
- * zero register offset.
- */
- if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM)
- return 0;
- break;
+ return 0;
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
* fixed offset.
*/
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
- case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
- * it's fixed offset must be 0. In the other cases, fixed offset
- * can be non-zero.
+ * its fixed offset must be 0. In the other cases, fixed offset
+ * can be non-zero. This was already checked above. So pass
+ * fixed_off_ok as true to allow fixed offset for all other
+ * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
+ * still need to do checks instead of returning.
*/
- if (arg_type_is_release(arg_type) && reg->off) {
- verbose(env, "R%d must have zero offset when passed to release func\n",
- regno);
- return -EINVAL;
- }
- /* For arg is release pointer, fixed_off_ok must be false, but
- * we already checked and rejected reg->off != 0 above, so set
- * to true to allow fixed offset for all other cases.
- */
- fixed_off_ok = true;
- break;
+ return __check_ptr_off_reg(env, reg, regno, true);
default:
- break;
+ return __check_ptr_off_reg(env, reg, regno, false);
}
- return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
}
-static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
- return state->stack[spi].spilled_ptr.id;
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return reg->ref_obj_id;
+
+ spi = get_spi(reg->off);
+ return state->stack[spi].spilled_ptr.ref_obj_id;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
@@ -6217,11 +6415,22 @@ skip_type_check:
if (arg_type_is_release(arg_type)) {
if (arg_type_is_dynptr(arg_type)) {
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
- !state->stack[spi].spilled_ptr.id) {
- verbose(env, "arg %d is an unacquired reference\n", regno);
+ /* Only dynptr created on stack can be released, thus
+ * the get_spi and stack state checks for spilled_ptr
+ * should only be done before process_dynptr_func for
+ * PTR_TO_STACK.
+ */
+ if (reg->type == PTR_TO_STACK) {
+ spi = get_spi(reg->off);
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.ref_obj_id) {
+ verbose(env, "arg %d is an unacquired reference\n", regno);
+ return -EINVAL;
+ }
+ } else {
+ verbose(env, "cannot release unowned const bpf_dynptr\n");
return -EINVAL;
}
} else if (!reg->ref_obj_id && !register_is_null(reg)) {
@@ -6318,19 +6527,22 @@ skip_type_check:
break;
case ARG_PTR_TO_SPIN_LOCK:
if (meta->func_id == BPF_FUNC_spin_lock) {
- if (process_spin_lock(env, regno, true))
- return -EACCES;
+ err = process_spin_lock(env, regno, true);
+ if (err)
+ return err;
} else if (meta->func_id == BPF_FUNC_spin_unlock) {
- if (process_spin_lock(env, regno, false))
- return -EACCES;
+ err = process_spin_lock(env, regno, false);
+ if (err)
+ return err;
} else {
verbose(env, "verifier internal error\n");
return -EFAULT;
}
break;
case ARG_PTR_TO_TIMER:
- if (process_timer_func(env, regno, meta))
- return -EACCES;
+ err = process_timer_func(env, regno, meta);
+ if (err)
+ return err;
break;
case ARG_PTR_TO_FUNC:
meta->subprogno = reg->subprogno;
@@ -6353,52 +6565,9 @@ skip_type_check:
err = check_mem_size_reg(env, reg, regno, true, meta);
break;
case ARG_PTR_TO_DYNPTR:
- /* We only need to check for initialized / uninitialized helper
- * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
- * assumption is that if it is, that a helper function
- * initialized the dynptr on behalf of the BPF program.
- */
- if (base_type(reg->type) == PTR_TO_DYNPTR)
- break;
- if (arg_type & MEM_UNINIT) {
- if (!is_dynptr_reg_valid_uninit(env, reg)) {
- verbose(env, "Dynptr has to be an uninitialized dynptr\n");
- return -EINVAL;
- }
-
- /* We only support one dynptr being uninitialized at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- if (meta->uninit_dynptr_regno) {
- verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
- return -EFAULT;
- }
-
- meta->uninit_dynptr_regno = regno;
- } else if (!is_dynptr_reg_valid_init(env, reg)) {
- verbose(env,
- "Expected an initialized dynptr as arg #%d\n",
- arg + 1);
- return -EINVAL;
- } else if (!is_dynptr_type_expected(env, reg, arg_type)) {
- const char *err_extra = "";
-
- switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
- case DYNPTR_TYPE_LOCAL:
- err_extra = "local";
- break;
- case DYNPTR_TYPE_RINGBUF:
- err_extra = "ringbuf";
- break;
- default:
- err_extra = "<unknown>";
- break;
- }
- verbose(env,
- "Expected a dynptr of type %s as arg #%d\n",
- err_extra, arg + 1);
- return -EINVAL;
- }
+ err = process_dynptr_func(env, regno, arg_type, meta);
+ if (err)
+ return err;
break;
case ARG_CONST_ALLOC_SIZE_OR_ZERO:
if (!tnum_is_const(reg->var_off)) {
@@ -6465,8 +6634,9 @@ skip_type_check:
break;
}
case ARG_PTR_TO_KPTR:
- if (process_kptr_func(env, regno, meta))
- return -EACCES;
+ err = process_kptr_func(env, regno, meta);
+ if (err)
+ return err;
break;
}
@@ -7234,11 +7404,10 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
{
/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
* callback_ctx, u64 flags);
- * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx);
+ * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
*/
__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
- callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL;
- __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+ mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
/* unused */
@@ -7632,7 +7801,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
+ /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+ * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
+ * is safe to do directly.
+ */
if (meta.uninit_dynptr_regno) {
+ if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
+ verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
+ return -EFAULT;
+ }
/* we write BPF_DW bits (8 bytes) at a time */
for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
@@ -7650,15 +7827,24 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (meta.release_regno) {
err = -EINVAL;
- if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
+ /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+ * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
+ * is safe to do directly.
+ */
+ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
+ if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
+ verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
+ return -EFAULT;
+ }
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
- else if (meta.ref_obj_id)
+ } else if (meta.ref_obj_id) {
err = release_reference(env, meta.ref_obj_id);
- /* meta.ref_obj_id can only be 0 if register that is meant to be
- * released is NULL, which must be > R0.
- */
- else if (register_is_null(&regs[meta.release_regno]))
+ } else if (register_is_null(&regs[meta.release_regno])) {
+ /* meta.ref_obj_id can only be 0 if register that is meant to be
+ * released is NULL, which must be > R0.
+ */
err = 0;
+ }
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -7732,11 +7918,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EFAULT;
}
- if (base_type(reg->type) != PTR_TO_DYNPTR)
- /* Find the id of the dynptr we're
- * tracking the reference of
- */
- meta.ref_obj_id = stack_slot_get_id(env, reg);
+ meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
break;
}
}
@@ -8026,6 +8208,11 @@ static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_DESTRUCTIVE;
}
+static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_RCU;
+}
+
static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
{
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
@@ -8710,13 +8897,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
switch (kf_arg_type) {
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
case KF_ARG_PTR_TO_BTF_ID:
- if (!is_kfunc_trusted_args(meta))
+ if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
break;
if (!is_trusted_reg(reg)) {
- verbose(env, "R%d must be referenced or trusted\n", regno);
- return -EINVAL;
+ if (!is_kfunc_rcu(meta)) {
+ verbose(env, "R%d must be referenced or trusted\n", regno);
+ return -EINVAL;
+ }
+ if (!is_rcu_reg(reg)) {
+ verbose(env, "R%d must be a rcu pointer\n", regno);
+ return -EINVAL;
+ }
}
+
fallthrough;
case KF_ARG_PTR_TO_CTX:
/* Trusted arguments have the same offset checks as release arguments */
@@ -8780,22 +8974,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_DYNPTR:
- if (reg->type != PTR_TO_STACK) {
- verbose(env, "arg#%d expected pointer to stack\n", i);
+ if (reg->type != PTR_TO_STACK &&
+ reg->type != CONST_PTR_TO_DYNPTR) {
+ verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
return -EINVAL;
}
- if (!is_dynptr_reg_valid_init(env, reg)) {
- verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n",
- i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
-
- if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
- verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
- i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
+ ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
+ if (ret < 0)
+ return ret;
break;
case KF_ARG_PTR_TO_LIST_HEAD:
if (reg->type != PTR_TO_MAP_VALUE &&
@@ -8827,7 +9014,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
- bpf_type_has_unsafe_modifiers(reg->type)) &&
+ (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
!reg2btf_ids[base_type(reg->type)]) {
verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
verbose(env, "expected %s or socket\n",
@@ -8942,7 +9129,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
} else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->type & MEM_RCU) {
- reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+ reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
reg->type |= PTR_UNTRUSTED;
}
}));
@@ -11282,7 +11469,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
bool is_null)
{
if (type_may_be_null(reg->type) && reg->id == id &&
- !WARN_ON_ONCE(!reg->id)) {
+ (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
/* Old offset (both fixed and variable parts) should have been
* known-zero, because we don't allow pointer arithmetic on
* pointers that might be NULL. If we see this happening, don't
@@ -12104,11 +12291,16 @@ static struct bpf_verifier_state_list **explored_state(
return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
}
-static void init_explored_state(struct bpf_verifier_env *env, int idx)
+static void mark_prune_point(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].prune_point = true;
}
+static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].prune_point;
+}
+
enum {
DONE_EXPLORING = 0,
KEEP_EXPLORING = 1,
@@ -12137,9 +12329,11 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return -EINVAL;
}
- if (e == BRANCH)
+ if (e == BRANCH) {
/* mark branch target for state pruning */
- init_explored_state(env, w);
+ mark_prune_point(env, w);
+ mark_jmp_point(env, w);
+ }
if (insn_state[w] == 0) {
/* tree-edge */
@@ -12166,8 +12360,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return DONE_EXPLORING;
}
-static int visit_func_call_insn(int t, int insn_cnt,
- struct bpf_insn *insns,
+static int visit_func_call_insn(int t, struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
@@ -12177,10 +12370,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
if (ret)
return ret;
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
+ mark_prune_point(env, t + 1);
+ /* when we exit from subprog, we need to record non-linear history */
+ mark_jmp_point(env, t + 1);
+
if (visit_callee) {
- init_explored_state(env, t);
+ mark_prune_point(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
/* It's ok to allow recursion from CFG point of
* view. __check_func_call() will do the actual
@@ -12196,13 +12391,13 @@ static int visit_func_call_insn(int t, int insn_cnt,
* DONE_EXPLORING - the instruction was fully explored
* KEEP_EXPLORING - there is still work to be done before it is fully explored
*/
-static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+static int visit_insn(int t, struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int ret;
if (bpf_pseudo_func(insns + t))
- return visit_func_call_insn(t, insn_cnt, insns, env, true);
+ return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
@@ -12215,13 +12410,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
case BPF_CALL:
if (insns[t].imm == BPF_FUNC_timer_set_callback)
- /* Mark this call insn to trigger is_state_visited() check
- * before call itself is processed by __check_func_call().
- * Otherwise new async state will be pushed for further
- * exploration.
+ /* Mark this call insn as a prune point to trigger
+ * is_state_visited() check before call itself is
+ * processed by __check_func_call(). Otherwise new
+ * async state will be pushed for further exploration.
*/
- init_explored_state(env, t);
- return visit_func_call_insn(t, insn_cnt, insns, env,
+ mark_prune_point(env, t);
+ return visit_func_call_insn(t, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
@@ -12234,22 +12429,15 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
if (ret)
return ret;
- /* unconditional jmp is not a good pruning point,
- * but it's marked, since backtracking needs
- * to record jmp history in is_state_visited().
- */
- init_explored_state(env, t + insns[t].off + 1);
- /* tell verifier to check for equivalent states
- * after every call and jump
- */
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
+ mark_prune_point(env, t + insns[t].off + 1);
+ mark_jmp_point(env, t + insns[t].off + 1);
return ret;
default:
/* conditional jump with two edges */
- init_explored_state(env, t);
+ mark_prune_point(env, t);
+
ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
if (ret)
return ret;
@@ -12285,7 +12473,7 @@ static int check_cfg(struct bpf_verifier_env *env)
while (env->cfg.cur_stack > 0) {
int t = insn_stack[env->cfg.cur_stack - 1];
- ret = visit_insn(t, insn_cnt, env);
+ ret = visit_insn(t, env);
switch (ret) {
case DONE_EXPLORING:
insn_state[t] = EXPLORED;
@@ -12876,15 +13064,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
- if (rold->type == PTR_TO_STACK)
- /* two stack pointers are equal only if they're pointing to
- * the same stack frame, since fp-8 in foo != fp-8 in bar
- */
- return equal && rold->frameno == rcur->frameno;
-
- if (equal)
- return true;
-
if (rold->type == NOT_INIT)
/* explored state can't have used this */
return true;
@@ -12892,6 +13071,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
return false;
switch (base_type(rold->type)) {
case SCALAR_VALUE:
+ if (equal)
+ return true;
if (env->explore_alu_limits)
return false;
if (rcur->type == SCALAR_VALUE) {
@@ -12938,7 +13119,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
+ tnum_in(rold->var_off, rcur->var_off) &&
+ check_ids(rold->id, rcur->id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
if (rcur->type != rold->type)
@@ -12962,20 +13144,14 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off);
- case PTR_TO_CTX:
- case CONST_PTR_TO_MAP:
- case PTR_TO_PACKET_END:
- case PTR_TO_FLOW_KEYS:
- case PTR_TO_SOCKET:
- case PTR_TO_SOCK_COMMON:
- case PTR_TO_TCP_SOCK:
- case PTR_TO_XDP_SOCK:
- /* Only valid matches are exact, which memcmp() above
- * would have accepted
+ case PTR_TO_STACK:
+ /* two stack pointers are equal only if they're pointing to
+ * the same stack frame, since fp-8 in foo != fp-8 in bar
*/
+ return equal && rold->frameno == rcur->frameno;
default:
- /* Don't know what's going on, just say it's not safe */
- return false;
+ /* Only valid matches are exact, which memcmp() */
+ return equal;
}
/* Shouldn't get here; if we do, say it's not safe */
@@ -13085,7 +13261,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
{
int i;
- memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
for (i = 0; i < MAX_BPF_REG; i++)
if (!regsafe(env, &old->regs[i], &cur->regs[i],
env->idmap_scratch))
@@ -13109,14 +13284,25 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->curframe != cur->curframe)
return false;
+ memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+
/* Verification state from speculative execution simulation
* must never prune a non-speculative execution one.
*/
if (old->speculative && !cur->speculative)
return false;
- if (old->active_lock.ptr != cur->active_lock.ptr ||
- old->active_lock.id != cur->active_lock.id)
+ if (old->active_lock.ptr != cur->active_lock.ptr)
+ return false;
+
+ /* Old and cur active_lock's have to be either both present
+ * or both absent.
+ */
+ if (!!old->active_lock.id != !!cur->active_lock.id)
+ return false;
+
+ if (old->active_lock.id &&
+ !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
return false;
if (old->active_rcu_lock != cur->active_rcu_lock)
@@ -13283,13 +13469,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
int i, j, err, states_cnt = 0;
bool add_new_state = env->test_state_freq ? true : false;
- cur->last_insn_idx = env->prev_insn_idx;
- if (!env->insn_aux_data[insn_idx].prune_point)
- /* this 'insn_idx' instruction wasn't marked, so we will not
- * be doing state search here
- */
- return 0;
-
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
* Do not add new state for future pruning if the verifier hasn't seen
@@ -13424,10 +13603,10 @@ next:
env->max_states_per_insn = states_cnt;
if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
- return push_jmp_history(env, cur);
+ return 0;
if (!add_new_state)
- return push_jmp_history(env, cur);
+ return 0;
/* There were no equivalent states, remember the current one.
* Technically the current state is not proven to be safe yet,
@@ -13567,21 +13746,31 @@ static int do_check(struct bpf_verifier_env *env)
return -E2BIG;
}
- err = is_state_visited(env, env->insn_idx);
- if (err < 0)
- return err;
- if (err == 1) {
- /* found equivalent state, can prune the search */
- if (env->log.level & BPF_LOG_LEVEL) {
- if (do_print_state)
- verbose(env, "\nfrom %d to %d%s: safe\n",
- env->prev_insn_idx, env->insn_idx,
- env->cur_state->speculative ?
- " (speculative execution)" : "");
- else
- verbose(env, "%d: safe\n", env->insn_idx);
+ state->last_insn_idx = env->prev_insn_idx;
+
+ if (is_prune_point(env, env->insn_idx)) {
+ err = is_state_visited(env, env->insn_idx);
+ if (err < 0)
+ return err;
+ if (err == 1) {
+ /* found equivalent state, can prune the search */
+ if (env->log.level & BPF_LOG_LEVEL) {
+ if (do_print_state)
+ verbose(env, "\nfrom %d to %d%s: safe\n",
+ env->prev_insn_idx, env->insn_idx,
+ env->cur_state->speculative ?
+ " (speculative execution)" : "");
+ else
+ verbose(env, "%d: safe\n", env->insn_idx);
+ }
+ goto process_bpf_exit;
}
- goto process_bpf_exit;
+ }
+
+ if (is_jmp_point(env, env->insn_idx)) {
+ err = push_jmp_history(env, state);
+ if (err)
+ return err;
}
if (signal_pending(current))
@@ -14123,10 +14312,11 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
break;
default:
verbose(env,
- "Sleepable programs can only use array, hash, and ringbuf maps\n");
+ "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
return -EINVAL;
}
@@ -14782,6 +14972,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
continue;
+ /* Zero-extension is done by the caller. */
+ if (bpf_pseudo_kfunc_call(&insn))
+ continue;
+
if (WARN_ON(load_reg == -1)) {
verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
return -EFAULT;
@@ -15292,7 +15486,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
/* insn->imm has the btf func_id. Replace it with
- * an address (relative to __bpf_base_call).
+ * an address (relative to __bpf_call_base).
*/
desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
if (!desc) {
@@ -16464,12 +16658,22 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
ret = -EINVAL;
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
- /* fentry/fexit/fmod_ret progs can be sleepable only if they are
+
+ /* fentry/fexit/fmod_ret progs can be sleepable if they are
* attached to ALLOW_ERROR_INJECTION and are not in denylist.
*/
if (!check_non_sleepable_error_inject(btf_id) &&
within_error_injection_list(addr))
ret = 0;
+ /* fentry/fexit/fmod_ret progs can also be sleepable if they are
+ * in the fmodret id set with the KF_SLEEPABLE flag.
+ */
+ else {
+ u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
+
+ if (flags && (*flags & KF_SLEEPABLE))
+ ret = 0;
+ }
break;
case BPF_PROG_TYPE_LSM:
/* LSM progs check that they are attached to bpf_lsm_*() funcs.
@@ -16490,7 +16694,10 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
bpf_log(log, "can't modify return codes of BPF programs\n");
return -EINVAL;
}
- ret = check_attach_modify_return(addr, tname);
+ ret = -EINVAL;
+ if (btf_kfunc_is_modify_return(btf, btf_id) ||
+ !check_attach_modify_return(addr, tname))
+ ret = 0;
if (ret) {
bpf_log(log, "%s() is not modifiable\n", tname);
return ret;
@@ -16679,7 +16886,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->allow_ptr_leaks = bpf_allow_ptr_leaks();
env->allow_uninit_stack = bpf_allow_uninit_stack();
- env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9bfd263dcef..2723623429ac 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -489,7 +489,6 @@ int noinline bpf_fentry_test1(int a)
return a + 1;
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
-ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
@@ -733,7 +732,15 @@ noinline void bpf_kfunc_call_test_destructive(void)
__diag_pop();
-ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+BTF_SET8_START(bpf_test_modify_return_ids)
+BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
+BTF_SET8_END(bpf_test_modify_return_ids)
+
+static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_test_modify_return_ids,
+};
BTF_SET8_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -1666,7 +1673,8 @@ static int __init bpf_prog_test_run_init(void)
};
int ret;
- ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set);
return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 9d2288c0736e..bb378c33f542 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,7 +310,6 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -321,7 +320,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &sk_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
diff --git a/net/core/dst.c b/net/core/dst.c
index bc9c9be4e080..bb14a0392388 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -316,6 +316,8 @@ void metadata_dst_free(struct metadata_dst *md_dst)
if (md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
+ if (md_dst->type == METADATA_XFRM)
+ dst_release(md_dst->u.xfrm_info.dst_orig);
kfree(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free);
@@ -340,16 +342,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
{
-#ifdef CONFIG_DST_CACHE
int cpu;
for_each_possible_cpu(cpu) {
struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+#ifdef CONFIG_DST_CACHE
if (one_md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
- }
#endif
+ if (one_md_dst->type == METADATA_XFRM)
+ dst_release(one_md_dst->u.xfrm_info.dst_orig);
+ }
free_percpu(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/filter.c b/net/core/filter.c
index 37baaa6b8fc3..929358677183 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -80,6 +80,7 @@
#include <net/tls.h>
#include <net/xdp.h>
#include <net/mptcp.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -5630,6 +5631,15 @@ static const struct bpf_func_proto bpf_bind_proto = {
};
#ifdef CONFIG_XFRM
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+struct metadata_dst __percpu *xfrm_bpf_md_dst;
+EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
+
+#endif
+
BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
struct bpf_xfrm_state *, to, u32, size, u64, flags)
{
@@ -7992,6 +8002,19 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return bpf_sk_base_func_proto(func_id);
}
+
+#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
+ /* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
+ * kfuncs are defined in two different modules, and we want to be able
+ * to use them interchangably with the same BTF type ID. Because modules
+ * can't de-duplicate BTF IDs between each other, we need the type to be
+ * referenced in the vmlinux BTF or the verifier will get confused about
+ * the different types. So we add this dummy type reference which will
+ * be included in vmlinux BTF, allowing both modules to refer to the
+ * same type ID.
+ */
+ BTF_TYPE_EMIT(struct nf_conn___init);
+#endif
}
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e6b9ced3eda8..53d0251788aa 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -886,13 +886,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
- if (psock->sk_redir)
+ if (psock->sk_redir) {
sock_put(psock->sk_redir);
- psock->sk_redir = msg->sk_redir;
- if (!psock->sk_redir) {
+ psock->sk_redir = NULL;
+ }
+ if (!msg->sk_redir) {
ret = __SK_DROP;
goto out;
}
+ psock->redir_ingress = sk_msg_to_ingress(msg);
+ psock->sk_redir = msg->sk_redir;
sock_hold(psock->sk_redir);
}
out:
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 81beb16ab1eb..22fa2c5bc6ec 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -349,11 +349,13 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL);
if (sk) {
+ sock_hold(sk);
lock_sock(sk);
rcu_read_lock();
sock_map_unref(sk, psk);
rcu_read_unlock();
release_sock(sk);
+ sock_put(sk);
}
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index cf9c3e8f7ccb..94aad3870c5f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -45,8 +45,11 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
tmp->sg.end = i;
if (apply) {
apply_bytes -= size;
- if (!apply_bytes)
+ if (!apply_bytes) {
+ if (sge->length)
+ sk_msg_iter_var_prev(i);
break;
+ }
}
} while (i != msg->sg.end);
@@ -131,10 +134,9 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
return ret;
}
-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
- u32 bytes, int flags)
+int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
+ struct sk_msg *msg, u32 bytes, int flags)
{
- bool ingress = sk_msg_to_ingress(msg);
struct sk_psock *psock = sk_psock_get(sk);
int ret;
@@ -276,10 +278,10 @@ msg_bytes_ready:
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, int *copied, int flags)
{
- bool cork = false, enospc = sk_msg_full(msg);
+ bool cork = false, enospc = sk_msg_full(msg), redir_ingress;
struct sock *sk_redir;
u32 tosend, origsize, sent, delta = 0;
- u32 eval = __SK_NONE;
+ u32 eval;
int ret;
more_data:
@@ -310,6 +312,7 @@ more_data:
tosend = msg->sg.size;
if (psock->apply_bytes && psock->apply_bytes < tosend)
tosend = psock->apply_bytes;
+ eval = __SK_NONE;
switch (psock->eval) {
case __SK_PASS:
@@ -321,6 +324,7 @@ more_data:
sk_msg_apply_bytes(psock, tosend);
break;
case __SK_REDIRECT:
+ redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
if (!psock->apply_bytes) {
@@ -337,7 +341,8 @@ more_data:
release_sock(sk);
origsize = msg->sg.size;
- ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+ ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
+ msg, tosend, flags);
sent = origsize - msg->sg.size;
if (eval == __SK_REDIRECT)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 264cf367e265..9ed978634125 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -792,7 +792,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sk_psock *psock;
struct sock *sk_redir;
struct tls_rec *rec;
- bool enospc, policy;
+ bool enospc, policy, redir_ingress;
int err = 0, send;
u32 delta = 0;
@@ -837,6 +837,7 @@ more_data:
}
break;
case __SK_REDIRECT:
+ redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
memcpy(&msg_redir, msg, sizeof(*msg));
if (msg->apply_bytes < send)
@@ -846,7 +847,8 @@ more_data:
sk_msg_return_zero(sk, msg, send);
msg->sg.size -= send;
release_sock(sk);
- err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
+ err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
+ &msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 494aa744bfb9..cd47f88921f5 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -3,6 +3,14 @@
# Makefile for the XFRM subsystem.
#
+xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
+
+ifeq ($(CONFIG_XFRM_INTERFACE),m)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o
+else ifeq ($(CONFIG_XFRM_INTERFACE),y)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o
+endif
+
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
xfrm_sysctl.o xfrm_replay.o xfrm_device.o
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
new file mode 100644
index 000000000000..1ef2162cebcf
--- /dev/null
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM Helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+
+#include <net/dst_metadata.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_info - XFRM metadata information
+ *
+ * Members:
+ * @if_id - XFRM if_id:
+ * Transmit: if_id to be used in policy and state lookups
+ * Receive: if_id of the state matched for the incoming packet
+ * @link - Underlying device ifindex:
+ * Transmit: used as the underlying device in VRF routing
+ * Receive: the device on which the packet had been received
+ */
+struct bpf_xfrm_info {
+ u32 if_id;
+ int link;
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in xfrm_interface BTF");
+
+/* bpf_skb_get_xfrm_info - Get XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @to - Pointer to memory to which the metadata will be copied
+ * Cannot be NULL
+ */
+__used noinline
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct xfrm_md_info *info;
+
+ info = skb_xfrm_md_info(skb);
+ if (!info)
+ return -EINVAL;
+
+ to->if_id = info->if_id;
+ to->link = info->link;
+ return 0;
+}
+
+/* bpf_skb_get_xfrm_info - Set XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @from - Pointer to memory from which the metadata will be copied
+ * Cannot be NULL
+ */
+__used noinline
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+ const struct bpf_xfrm_info *from)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct metadata_dst *md_dst;
+ struct xfrm_md_info *info;
+
+ if (unlikely(skb_metadata_dst(skb)))
+ return -EINVAL;
+
+ if (!xfrm_bpf_md_dst) {
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(0, METADATA_XFRM, GFP_ATOMIC);
+ if (!tmp)
+ return -ENOMEM;
+ if (cmpxchg(&xfrm_bpf_md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
+ }
+ md_dst = this_cpu_ptr(xfrm_bpf_md_dst);
+
+ info = &md_dst->u.xfrm_info;
+
+ info->if_id = from->if_id;
+ info->link = from->link;
+ skb_dst_force(skb);
+ info->dst_orig = skb_dst(skb);
+
+ dst_hold((struct dst_entry *)md_dst);
+ skb_dst_set(skb, (struct dst_entry *)md_dst);
+ return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
+BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
+BTF_SET8_END(xfrm_ifc_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xfrm_ifc_kfunc_set,
+};
+
+int __init register_xfrm_interface_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &xfrm_interface_kfunc_set);
+}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface_core.c
index 5a67b120c4db..1f99dc469027 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if_id = md_info->if_id;
fl->flowi_oif = md_info->link;
+ if (md_info->dst_orig) {
+ struct dst_entry *tmp_dst = dst;
+
+ dst = md_info->dst_orig;
+ skb_dst_set(skb, dst);
+ md_info->dst_orig = NULL;
+ dst_release(tmp_dst);
+ }
} else {
if_id = xi->p.if_id;
}
@@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void)
if (err < 0)
goto rtnl_link_failed;
+ err = register_xfrm_interface_bpf();
+ if (err < 0)
+ goto kfunc_failed;
+
lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
xfrm_if_register_cb(&xfrm_if_cb);
return err;
+kfunc_failed:
+ rtnl_link_unregister(&xfrmi_link_ops);
rtnl_link_failed:
xfrmi6_fini();
xfrmi6_failed:
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index fdb0aff8cb5a..e8d90829f23e 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -752,6 +752,7 @@ class PrinterHelpers(Printer):
'struct bpf_timer',
'struct mptcp_sock',
'struct bpf_dynptr',
+ 'const struct bpf_dynptr',
'struct iphdr',
'struct ipv6hdr',
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c90b756945e3..620032042576 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -501,6 +501,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
if (err) {
p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
pinned_info.id, path, strerror(errno));
+ free(path);
goto out_close;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f89de51a45db..464ca3f01fe7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5293,7 +5293,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5303,7 +5303,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5313,7 +5313,7 @@ union bpf_attr {
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
*
- * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -5414,7 +5414,7 @@ union bpf_attr {
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
- * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 0242f31e339c..901d98b865a1 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -673,6 +673,7 @@ enum {
IFLA_XFRM_UNSPEC,
IFLA_XFRM_LINK,
IFLA_XFRM_IF_ID,
+ IFLA_XFRM_COLLECT_METADATA,
__IFLA_XFRM_MAX
};
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4c904ef0b47e..477666f3d496 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -286,3 +286,20 @@ tags:
# Delete partially updated (corrupted) files on error
.DELETE_ON_ERROR:
+
+help:
+ @echo 'libbpf common targets:'
+ @echo ' HINT: use "V=1" to enable verbose build'
+ @echo ' all - build libraries and pkgconfig'
+ @echo ' clean - remove all generated files'
+ @echo ' check - check abi and version info'
+ @echo ''
+ @echo 'libbpf install targets:'
+ @echo ' HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")'
+ @echo ' to adjust target desitantion, e.g. "make prefix=/usr/local install"'
+ @echo ' install - build and install all headers, libraries and pkgconfig'
+ @echo ' install_headers - install only headers to include/bpf'
+ @echo ''
+ @echo 'libbpf make targets:'
+ @echo ' tags - use ctags to make tag information for source code browsing'
+ @echo ' cscope - use cscope to make interactive source code browsing database'
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a112e0ed1b19..7468978d3c27 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -409,8 +409,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
+#ifdef __cplusplus
+/* forward-declaring enums in C++ isn't compatible with pure C enums, so
+ * instead define bpf_enable_stats() as accepting int as an input
+ */
+LIBBPF_API int bpf_enable_stats(int type);
+#else
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+#endif
struct bpf_prog_bind_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index b8daae265f99..75b411fc2c77 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1233,6 +1233,14 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) {
+ /* Memory dereference case without offset, e.g., 8@(%rsp) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
} else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 8e77515d56f6..99cc33c51eaa 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -28,6 +28,7 @@ kfree_skb # attach fentry unexpected erro
kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
kfunc_call/subprog_lskel # skel unexpected error: -2
kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
+kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 648a8a1b6b78..585fcf73c731 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -29,6 +29,7 @@ htab_update # failed to attach: ERROR: strerror_r(-
kfree_skb # attach fentry unexpected error: -524 (trampoline)
kfunc_call # 'bpf_prog_active': not found in kernel BTF (?)
kfunc_dynptr_param # JIT does not support calling kernel function (kfunc)
+kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test # relies on fentry
ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
@@ -84,3 +85,4 @@ xdp_bonding # failed to auto-attach program 'trace_
xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
xdp_synproxy # JIT does not support calling kernel function (kfunc)
+xfrm_info # JIT does not support calling kernel function (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a0f043dc410..c22c43bbee19 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -527,13 +527,15 @@ TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
network_helpers.c testing_helpers.c \
btf_helpers.c flow_dissector_load.h \
- cap_helpers.c
+ cap_helpers.c test_loader.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
$(OUTPUT)/sign-file \
- ima_setup.sh verify_sig_setup.sh \
- $(wildcard progs/btf_dump_test_case_*.c)
+ ima_setup.sh \
+ verify_sig_setup.sh \
+ $(wildcard progs/btf_dump_test_case_*.c) \
+ $(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 845209581440..bc4555a003a7 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,15 +2,22 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
+#if __GNUC__ && !__clang__
+/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We
+ * provide the "standard" names as synonyms of the corresponding GCC
+ * builtins. Note how the SKB argument is ignored.
+ */
+#define load_byte(skb, off) __builtin_bpf_load_byte(off)
+#define load_half(skb, off) __builtin_bpf_load_half(off)
+#define load_word(skb, off) __builtin_bpf_load_word(off)
+#else
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
*/
-unsigned long long load_byte(void *skb,
- unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
- unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
- unsigned long long off) asm("llvm.bpf.load.word");
+unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word");
+#endif
#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f9034ea00bc9..612f699dc4f7 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -8,7 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y
CONFIG_BPF_LSM=y
CONFIG_BPF_STREAM_PARSER=y
CONFIG_BPF_SYSCALL=y
-CONFIG_BPF_UNPRIV_DEFAULT_OFF=n
+# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
CONFIG_CGROUP_BPF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA256=y
@@ -23,6 +23,7 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_IMA=y
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_WRITE_POLICY=y
+CONFIG_INET_ESP=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_RAW=y
CONFIG_IP_NF_TARGET_SYNPROXY=y
@@ -70,7 +71,8 @@ CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
-CONFIG_TEST_BPF=y
+CONFIG_TEST_BPF=m
CONFIG_USERFAULTFD=y
CONFIG_VXLAN=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XFRM_INTERFACE=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 1f37adff7632..01de33191226 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -390,49 +390,6 @@ struct nstoken {
int orig_netns_fd;
};
-static int setns_by_fd(int nsfd)
-{
- int err;
-
- err = setns(nsfd, CLONE_NEWNET);
- close(nsfd);
-
- if (!ASSERT_OK(err, "setns"))
- return err;
-
- /* Switch /sys to the new namespace so that e.g. /sys/class/net
- * reflects the devices in the new namespace.
- */
- err = unshare(CLONE_NEWNS);
- if (!ASSERT_OK(err, "unshare"))
- return err;
-
- /* Make our /sys mount private, so the following umount won't
- * trigger the global umount in case it's shared.
- */
- err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
- if (!ASSERT_OK(err, "remount private /sys"))
- return err;
-
- err = umount2("/sys", MNT_DETACH);
- if (!ASSERT_OK(err, "umount2 /sys"))
- return err;
-
- err = mount("sysfs", "/sys", "sysfs", 0, NULL);
- if (!ASSERT_OK(err, "mount /sys"))
- return err;
-
- err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
- if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
- return err;
-
- err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL);
- if (!ASSERT_OK(err, "mount /sys/kernel/debug"))
- return err;
-
- return 0;
-}
-
struct nstoken *open_netns(const char *name)
{
int nsfd;
@@ -453,8 +410,9 @@ struct nstoken *open_netns(const char *name)
if (!ASSERT_GE(nsfd, 0, "open netns fd"))
goto fail;
- err = setns_by_fd(nsfd);
- if (!ASSERT_OK(err, "setns_by_fd"))
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ if (!ASSERT_OK(err, "setns"))
goto fail;
return token;
@@ -465,6 +423,7 @@ fail:
void close_netns(struct nstoken *token)
{
- ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
+ close(token->orig_netns_fd);
free(token);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 7a277035c275..ef4d6a3ae423 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -9,6 +9,7 @@
#include <string.h>
#include <errno.h>
#include <sched.h>
+#include <net/if.h>
#include <linux/compiler.h>
#include <bpf/libbpf.h>
@@ -20,10 +21,12 @@ static struct test_btf_skc_cls_ingress *skel;
static struct sockaddr_in6 srv_sa6;
static __u32 duration;
-#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
-
static int prepare_netns(void)
{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.cls_ingress));
+
if (CHECK(unshare(CLONE_NEWNET), "create netns",
"unshare(CLONE_NEWNET): %s (%d)",
strerror(errno), errno))
@@ -33,12 +36,12 @@ static int prepare_netns(void)
"ip link set dev lo up", "failed\n"))
return -1;
- if (CHECK(system("tc qdisc add dev lo clsact"),
- "tc qdisc add dev lo clsact", "failed\n"))
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
return -1;
- if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
- "install tc cls-prog at ingress", "failed\n"))
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
return -1;
/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
@@ -195,19 +198,12 @@ static struct test tests[] = {
void test_btf_skc_cls_ingress(void)
{
- int i, err;
+ int i;
skel = test_btf_skc_cls_ingress__open_and_load();
if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
return;
- err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
- if (CHECK(err, "bpf_program__pin",
- "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
- test_btf_skc_cls_ingress__destroy(skel);
- return;
- }
-
for (i = 0; i < ARRAY_SIZE(tests); i++) {
if (!test__start_subtest(tests[i].desc))
continue;
@@ -221,6 +217,5 @@ void test_btf_skc_cls_ingress(void)
reset_test();
}
- bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
test_btf_skc_cls_ingress__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 1c30412ba132..33a2776737e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -10,7 +10,9 @@
#include "cgrp_ls_recursion.skel.h"
#include "cgrp_ls_attach_cgroup.skel.h"
#include "cgrp_ls_negative.skel.h"
+#include "cgrp_ls_sleepable.skel.h"
#include "network_helpers.h"
+#include "cgroup_helpers.h"
struct socket_cookie {
__u64 cookie_key;
@@ -150,14 +152,100 @@ static void test_negative(void)
}
}
+static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct cgrp_ls_sleepable *skel;
+ struct bpf_link *link;
+ int err, iter_fd;
+ char buf[16];
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.cgroup_iter, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto out;
+
+ /* trigger the program run */
+ (void)read(iter_fd, buf, sizeof(buf));
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+
+ close(iter_fd);
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_no_rcu_lock(__u64 cgroup_id)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = cgrp_ls_sleepable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_ERR(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
void test_cgrp_local_storage(void)
{
+ __u64 cgroup_id;
int cgroup_fd;
cgroup_fd = test__join_cgroup("/cgrp_local_storage");
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
return;
+ cgroup_id = get_cgroup_id("/cgrp_local_storage");
if (test__start_subtest("tp_btf"))
test_tp_btf(cgroup_fd);
if (test__start_subtest("attach_cgroup"))
@@ -166,6 +254,12 @@ void test_cgrp_local_storage(void)
test_recursion(cgroup_fd);
if (test__start_subtest("negative"))
test_negative();
+ if (test__start_subtest("cgroup_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+ if (test__start_subtest("no_rcu_lock"))
+ test_no_rcu_lock(cgroup_id);
+ if (test__start_subtest("rcu_lock"))
+ test_rcu_lock();
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b0c06f821cb8..7faaf6d9e0d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -5,86 +5,16 @@
#include "dynptr_fail.skel.h"
#include "dynptr_success.skel.h"
-static size_t log_buf_sz = 1048576; /* 1 MB */
-static char obj_log_buf[1048576];
-
static struct {
const char *prog_name;
const char *expected_err_msg;
} dynptr_tests[] = {
- /* failure cases */
- {"ringbuf_missing_release1", "Unreleased reference id=1"},
- {"ringbuf_missing_release2", "Unreleased reference id=2"},
- {"ringbuf_missing_release_callback", "Unreleased reference id"},
- {"use_after_invalid", "Expected an initialized dynptr as arg #3"},
- {"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"},
- {"add_dynptr_to_map1", "invalid indirect read from stack"},
- {"add_dynptr_to_map2", "invalid indirect read from stack"},
- {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
- {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
- {"data_slice_use_after_release1", "invalid mem access 'scalar'"},
- {"data_slice_use_after_release2", "invalid mem access 'scalar'"},
- {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
- {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
- {"invalid_helper1", "invalid indirect read from stack"},
- {"invalid_helper2", "Expected an initialized dynptr as arg #3"},
- {"invalid_write1", "Expected an initialized dynptr as arg #1"},
- {"invalid_write2", "Expected an initialized dynptr as arg #3"},
- {"invalid_write3", "Expected an initialized dynptr as arg #1"},
- {"invalid_write4", "arg 1 is an unacquired reference"},
- {"invalid_read1", "invalid read from stack"},
- {"invalid_read2", "cannot pass in dynptr at an offset"},
- {"invalid_read3", "invalid read from stack"},
- {"invalid_read4", "invalid read from stack"},
- {"invalid_offset", "invalid write to stack"},
- {"global", "type=map_value expected=fp"},
- {"release_twice", "arg 1 is an unacquired reference"},
- {"release_twice_callback", "arg 1 is an unacquired reference"},
- {"dynptr_from_mem_invalid_api",
- "Unsupported reg type fp for bpf_dynptr_from_mem data"},
-
/* success cases */
{"test_read_write", NULL},
{"test_data_slice", NULL},
{"test_ringbuf", NULL},
};
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
-{
- LIBBPF_OPTS(bpf_object_open_opts, opts);
- struct bpf_program *prog;
- struct dynptr_fail *skel;
- int err;
-
- opts.kernel_log_buf = obj_log_buf;
- opts.kernel_log_size = log_buf_sz;
- opts.kernel_log_level = 1;
-
- skel = dynptr_fail__open_opts(&opts);
- if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
- goto cleanup;
-
- prog = bpf_object__find_program_by_name(skel->obj, prog_name);
- if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
- goto cleanup;
-
- bpf_program__set_autoload(prog, true);
-
- bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
-
- err = dynptr_fail__load(skel);
- if (!ASSERT_ERR(err, "unexpected load success"))
- goto cleanup;
-
- if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
- fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
- fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
- }
-
-cleanup:
- dynptr_fail__destroy(skel);
-}
-
static void verify_success(const char *prog_name)
{
struct dynptr_success *skel;
@@ -97,8 +27,6 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
- bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
-
dynptr_success__load(skel);
if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
goto cleanup;
@@ -129,10 +57,8 @@ void test_dynptr(void)
if (!test__start_subtest(dynptr_tests[i].prog_name))
continue;
- if (dynptr_tests[i].expected_err_msg)
- verify_fail(dynptr_tests[i].prog_name,
- dynptr_tests[i].expected_err_msg);
- else
- verify_success(dynptr_tests[i].prog_name);
+ verify_success(dynptr_tests[i].prog_name);
}
+
+ RUN_TESTS(dynptr_fail);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 0613f3bb8b5e..32dd731e9070 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -9,7 +9,7 @@
goto out; \
})
-void serial_test_empty_skb(void)
+void test_empty_skb(void)
{
LIBBPF_OPTS(bpf_test_run_opts, tattr);
struct empty_skb *bpf_obj = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index 55d641c1f126..a9229260a6ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -18,11 +18,8 @@ static struct {
const char *expected_verifier_err_msg;
int expected_runtime_err;
} kfunc_dynptr_tests[] = {
- {"dynptr_type_not_supp",
- "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
- {"not_valid_dynptr",
- "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
- {"not_ptr_to_stack", "arg#0 expected pointer to stack", 0},
+ {"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0},
+ {"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
{"dynptr_data_null", NULL, -EBADMSG},
};
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 0d66b1524208..3533a4ecad01 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -5,83 +5,6 @@
#include "map_kptr.skel.h"
#include "map_kptr_fail.skel.h"
-static char log_buf[1024 * 1024];
-
-struct {
- const char *prog_name;
- const char *err_msg;
-} map_kptr_fail_tests[] = {
- { "size_not_bpf_dw", "kptr access size must be BPF_DW" },
- { "non_const_var_off", "kptr access cannot have variable offset" },
- { "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" },
- { "misaligned_access_write", "kptr access misaligned expected=8 off=7" },
- { "misaligned_access_read", "kptr access misaligned expected=8 off=1" },
- { "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" },
- { "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" },
- { "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
- { "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" },
- { "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" },
- { "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" },
- { "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" },
- { "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" },
- { "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" },
- { "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" },
- { "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" },
- { "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
- { "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" },
- { "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" },
- { "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" },
- { "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" },
- { "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" },
- { "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" },
- { "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" },
- { "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" },
-};
-
-static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg)
-{
- LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
- .kernel_log_size = sizeof(log_buf),
- .kernel_log_level = 1);
- struct map_kptr_fail *skel;
- struct bpf_program *prog;
- int ret;
-
- skel = map_kptr_fail__open_opts(&opts);
- if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts"))
- return;
-
- prog = bpf_object__find_program_by_name(skel->obj, prog_name);
- if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
- goto end;
-
- bpf_program__set_autoload(prog, true);
-
- ret = map_kptr_fail__load(skel);
- if (!ASSERT_ERR(ret, "map_kptr__load must fail"))
- goto end;
-
- if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
- fprintf(stderr, "Expected: %s\n", err_msg);
- fprintf(stderr, "Verifier: %s\n", log_buf);
- }
-
-end:
- map_kptr_fail__destroy(skel);
-}
-
-static void test_map_kptr_fail(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) {
- if (!test__start_subtest(map_kptr_fail_tests[i].prog_name))
- continue;
- test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name,
- map_kptr_fail_tests[i].err_msg);
- }
-}
-
static void test_map_kptr_success(bool test_run)
{
LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -145,5 +68,6 @@ void test_map_kptr(void)
*/
test_map_kptr_success(true);
}
- test_map_kptr_fail();
+
+ RUN_TESTS(map_kptr_fail);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index ffd8ef4303c8..18848c31e36f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -103,6 +103,7 @@ static struct {
{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+ {"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"},
};
static void verify_fail(const char *prog_name, const char *expected_err_msg)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index cb6a53b3e023..bca5e6839ac4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,12 +11,12 @@
*/
#include <arpa/inet.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
#include <linux/time_types.h>
#include <linux/net_tstamp.h>
+#include <net/if.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
@@ -59,10 +59,6 @@
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
-#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
-#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
-#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
-
#define TIMEOUT_MILLIS 10000
#define NSEC_PER_SEC 1000000000ULL
@@ -115,7 +111,9 @@ static void netns_setup_namespaces_nofail(const char *verb)
}
struct netns_setup_result {
+ int ifindex_veth_src;
int ifindex_veth_src_fwd;
+ int ifindex_veth_dst;
int ifindex_veth_dst_fwd;
};
@@ -139,27 +137,6 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0;
}
-static int get_ifindex(const char *name)
-{
- char path[PATH_MAX];
- char buf[32];
- FILE *f;
- int ret;
-
- snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
- f = fopen(path, "r");
- if (!ASSERT_OK_PTR(f, path))
- return -1;
-
- ret = fread(buf, 1, sizeof(buf), f);
- if (!ASSERT_GT(ret, 0, "fread ifindex")) {
- fclose(f);
- return -1;
- }
- fclose(f);
- return atoi(buf);
-}
-
#define SYS(fmt, ...) \
({ \
char cmd[1024]; \
@@ -182,11 +159,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
- if (result->ifindex_veth_src_fwd < 0)
+ result->ifindex_veth_src = if_nametoindex("veth_src");
+ if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+ goto fail;
+
+ result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
+ if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
goto fail;
- result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
- if (result->ifindex_veth_dst_fwd < 0)
+
+ result->ifindex_veth_dst = if_nametoindex("veth_dst");
+ if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+ goto fail;
+
+ result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
+ if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
goto fail;
SYS("ip link set veth_src netns " NS_SRC);
@@ -260,19 +246,78 @@ fail:
return -1;
}
-static int netns_load_bpf(void)
+static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
+{
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->ifindex = ifindex;
+ qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(qdisc_hook);
+ snprintf(err_str, sizeof(err_str),
+ "qdisc add dev %s clsact",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
+ enum bpf_tc_attach_point xgress,
+ const struct bpf_program *prog, int priority)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->attach_point = xgress;
+ tc_attach.prog_fd = bpf_program__fd(prog);
+ tc_attach.priority = priority;
+ err = bpf_tc_attach(qdisc_hook, &tc_attach);
+ snprintf(err_str, sizeof(err_str),
+ "filter add dev %s %s prio %d bpf da %s",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
+ xgress == BPF_TC_INGRESS ? "ingress" : "egress",
+ priority, bpf_program__name(prog));
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
+ if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
+ goto fail; \
+})
+
+#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
+ if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
+ goto fail; \
+})
+
+static int netns_load_bpf(const struct bpf_program *src_prog,
+ const struct bpf_program *dst_prog,
+ const struct bpf_program *chk_prog,
+ const struct netns_setup_result *setup_result)
{
- SYS("tc qdisc add dev veth_src_fwd clsact");
- SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
- SRC_PROG_PIN_FILE);
- SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
- CHK_PROG_PIN_FILE);
-
- SYS("tc qdisc add dev veth_dst_fwd clsact");
- SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
- DST_PROG_PIN_FILE);
- SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
- CHK_PROG_PIN_FILE);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ int err;
+
+ /* tc qdisc add dev veth_src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
+ /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+ /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+ /* tc qdisc add dev veth_dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+ /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+ /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
return 0;
fail:
@@ -499,78 +544,79 @@ done:
close(client_fd);
}
-static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
+static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
+ const struct netns_setup_result *setup_result)
{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
struct nstoken *nstoken;
-
-#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
-#define PIN(__prog) ({ \
- int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
- if (!ASSERT_OK(err, "pin " #__prog)) \
- goto fail; \
- })
+ int err;
/* setup ns_src tc progs */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return -1;
- PIN(egress_host);
- PIN(ingress_host);
- SYS("tc qdisc add dev veth_src clsact");
- SYS("tc filter add dev veth_src ingress bpf da object-pinned "
- PIN_FNAME(ingress_host));
- SYS("tc filter add dev veth_src egress bpf da object-pinned "
- PIN_FNAME(egress_host));
+ /* tc qdisc add dev veth_src clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
+ /* tc filter add dev veth_src ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev veth_src egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_dst tc progs */
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
return -1;
- PIN(egress_host);
- PIN(ingress_host);
- SYS("tc qdisc add dev veth_dst clsact");
- SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
- PIN_FNAME(ingress_host));
- SYS("tc filter add dev veth_dst egress bpf da object-pinned "
- PIN_FNAME(egress_host));
+ /* tc qdisc add dev veth_dst clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
+ /* tc filter add dev veth_dst ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev veth_dst egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_fwd tc progs */
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
return -1;
- PIN(ingress_fwdns_prio100);
- PIN(egress_fwdns_prio100);
- PIN(ingress_fwdns_prio101);
- PIN(egress_fwdns_prio101);
- SYS("tc qdisc add dev veth_dst_fwd clsact");
- SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
- PIN_FNAME(ingress_fwdns_prio100));
- SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
- PIN_FNAME(ingress_fwdns_prio101));
- SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
- PIN_FNAME(egress_fwdns_prio100));
- SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
- PIN_FNAME(egress_fwdns_prio101));
- SYS("tc qdisc add dev veth_src_fwd clsact");
- SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
- PIN_FNAME(ingress_fwdns_prio100));
- SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
- PIN_FNAME(ingress_fwdns_prio101));
- SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
- PIN_FNAME(egress_fwdns_prio100));
- SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
- PIN_FNAME(egress_fwdns_prio101));
+ /* tc qdisc add dev veth_dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+ /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
+
+ /* tc qdisc add dev veth_src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
+ /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
close_netns(nstoken);
-
-#undef PIN
-
return 0;
fail:
close_netns(nstoken);
- return -1;
+ return err;
}
enum {
@@ -746,7 +792,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
if (!ASSERT_OK(err, "test_tc_dtime__load"))
goto done;
- if (netns_load_dtime_bpf(skel))
+ if (netns_load_dtime_bpf(skel, setup_result))
goto done;
nstoken = open_netns(NS_FWD);
@@ -788,7 +834,6 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
struct nstoken *nstoken = NULL;
struct test_tc_neigh_fib *skel = NULL;
- int err;
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
@@ -801,19 +846,8 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
goto done;
- err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
- goto done;
-
- if (netns_load_bpf())
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
@@ -849,19 +883,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (!ASSERT_OK(err, "test_tc_neigh__load"))
goto done;
- err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
- goto done;
-
- if (netns_load_bpf())
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
goto done;
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
@@ -896,19 +919,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (!ASSERT_OK(err, "test_tc_peer__load"))
goto done;
- err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
- goto done;
-
- err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
- goto done;
-
- if (netns_load_bpf())
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
goto done;
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
@@ -991,6 +1003,8 @@ static int tun_relay_loop(int src_fd, int target_fd)
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
struct test_tc_peer *skel = NULL;
struct nstoken *nstoken = NULL;
int err;
@@ -1034,8 +1048,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto fail;
- ifindex = get_ifindex("tun_fwd");
- if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ ifindex = if_nametoindex("tun_fwd");
+ if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
goto fail;
skel->rodata->IFINDEX_SRC = ifindex;
@@ -1045,31 +1059,21 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
if (!ASSERT_OK(err, "test_tc_peer__load"))
goto fail;
- err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
- goto fail;
-
- err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
- goto fail;
-
- err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
- goto fail;
-
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
* towards dst, and "tc_dst" to redirect packets
* and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
*/
- SYS("tc qdisc add dev tun_fwd clsact");
- SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
- SRC_PROG_PIN_FILE);
-
- SYS("tc qdisc add dev veth_dst_fwd clsact");
- SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
- DST_PROG_PIN_FILE);
- SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
- CHK_PROG_PIN_FILE);
+ /* tc qdisc add dev tun_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
+ /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
+ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
+
+ /* tc qdisc add dev veth_dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+ /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+ /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
+ XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1134,7 +1138,7 @@ static void *test_tc_redirect_run_tests(void *arg)
return NULL;
}
-void serial_test_tc_redirect(void)
+void test_tc_redirect(void)
{
pthread_t test_thread;
int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index eea274110267..07ad457f3370 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -421,7 +421,7 @@ static void *test_tunnel_run_tests(void *arg)
return NULL;
}
-void serial_test_tunnel(void)
+void test_tunnel(void)
{
pthread_t test_thread;
int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index 02b18d018b36..dae68de285b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -673,9 +673,11 @@ static struct {
{"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"},
{"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"},
{"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"},
- {"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"},
- {"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"},
+ {"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"},
+ {"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"},
{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
+ {"user_ringbuf_callback_reinit_dynptr_mem", "Dynptr has to be an uninitialized dynptr"},
+ {"user_ringbuf_callback_reinit_dynptr_ringbuf", "Dynptr has to be an uninitialized dynptr"},
};
#define SUCCESS_TEST(_func) { _func, #_func }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 9ac6f6a268db..a50971c6cf4a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd)
}
#define NUM_PKTS 10000
-void serial_test_xdp_do_redirect(void)
+void test_xdp_do_redirect(void)
{
int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
char data[sizeof(pkt_udp) + sizeof(__u32)];
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index 13daa3746064..c72083885b6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -174,7 +174,7 @@ out:
system("ip netns del synproxy");
}
-void serial_test_xdp_synproxy(void)
+void test_xdp_synproxy(void)
{
if (test__start_subtest("xdp"))
test_synproxy(true);
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
new file mode 100644
index 000000000000..8b03c9bb4862
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace | NS2 namespace
+ * | |
+ * +---------------+ | +---------------+ |
+ * | ipsec0 |---------| ipsec0 | |
+ * | 192.168.1.100 | | | 192.168.1.200 | |
+ * | if_id: bpf | | +---------------+ |
+ * +---------------+ | |
+ * | | | +---------------+
+ * | | | | ipsec0 |
+ * \------------------------------------------| 192.168.1.200 |
+ * | | +---------------+
+ * | |
+ * | | (overlay network)
+ * ------------------------------------------------------
+ * | | (underlay network)
+ * +--------------+ | +--------------+ |
+ * | veth01 |----------| veth10 | |
+ * | 172.16.1.100 | | | 172.16.1.200 | |
+ * ---------------+ | +--------------+ |
+ * | |
+ * +--------------+ | | +--------------+
+ * | veth02 |-----------------------------------| veth20 |
+ * | 172.16.2.100 | | | | 172.16.2.200 |
+ * +--------------+ | | +--------------+
+ *
+ *
+ * Test Packet flow
+ * -----------
+ * The tests perform 'ping 192.168.1.200' from the NS0 namespace:
+ * 1) request is routed to NS0 ipsec0
+ * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based
+ * on the requested value. This makes the ipsec0 device in external mode
+ * select the destination tunnel
+ * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was
+ * used) and response is sent
+ * 4) response is received on NS0 ipsec0, tc ingress program is triggered and
+ * records the response if_id
+ * 5) requested if_id is compared with received if_id
+ */
+
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "xfrm_info.skel.h"
+
+#define NS0 "xfrm_test_ns0"
+#define NS1 "xfrm_test_ns1"
+#define NS2 "xfrm_test_ns2"
+
+#define IF_ID_0_TO_1 1
+#define IF_ID_0_TO_2 2
+#define IF_ID_1 3
+#define IF_ID_2 4
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define ESP_DUMMY_PARAMS \
+ "proto esp aead 'rfc4106(gcm(aes))' " \
+ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
+
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto fail; \
+ })
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ system(cmd); \
+ })
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+ LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
+ .prog_fd = igr_fd);
+ LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1,
+ .prog_fd = egr_fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (igr_fd >= 0) {
+ hook->attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ if (egr_fd >= 0) {
+ hook->attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+ SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2);
+}
+
+static int config_underlay(void)
+{
+ SYS("ip netns add " NS0);
+ SYS("ip netns add " NS1);
+ SYS("ip netns add " NS2);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+ SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS("ip -net " NS0 " link set dev veth01 up");
+ SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS("ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS2 [veth02 <-> veth20] */
+ SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+ SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS("ip -net " NS0 " link set dev veth02 up");
+ SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS("ip -net " NS2 " link set dev veth20 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
+ const char *ipv4_remote, int if_id)
+{
+ /* State: local -> remote */
+ SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
+
+ /* State: local <- remote */
+ SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
+
+ /* Policy: local -> remote */
+ SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_local, ipv4_remote, if_id);
+
+ /* Policy: local <- remote */
+ SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_remote, ipv4_local, if_id);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b,
+ const char *ipv4_a, const char *ipv4_b,
+ int if_id_a, int if_id_b)
+{
+ return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) ||
+ setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b);
+}
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned short len)
+{
+ struct rtattr *rta =
+ (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(len);
+ nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+ return rta;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+ const char *s)
+{
+ struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+ memcpy(RTA_DATA(rta), s, strlen(s));
+ return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+ return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+ attr->rta_len = end - (uint8_t *)attr;
+}
+
+static int setup_xfrmi_external_dev(const char *ns)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ unsigned char data[128];
+ } req;
+ struct rtattr *link_info, *info_data;
+ struct nstoken *nstoken;
+ int ret = -1, sock = -1;
+ struct nlmsghdr *nh;
+
+ memset(&req, 0, sizeof(req));
+ nh = &req.nh;
+ nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ nh->nlmsg_type = RTM_NEWLINK;
+ nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
+
+ rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
+ link_info = rtattr_begin(nh, IFLA_LINKINFO);
+ rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
+ info_data = rtattr_begin(nh, IFLA_INFO_DATA);
+ rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
+ rtattr_end(nh, info_data);
+ rtattr_end(nh, link_info);
+
+ nstoken = open_netns(ns);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
+
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (!ASSERT_GE(sock, 0, "netlink socket"))
+ goto done;
+ ret = send(sock, nh, nh->nlmsg_len, 0);
+ if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
+ goto done;
+
+ ret = 0;
+done:
+ if (sock != -1)
+ close(sock);
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int config_overlay(void)
+{
+ if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
+ IF_ID_0_TO_1, IF_ID_1))
+ goto fail;
+ if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
+ IF_ID_0_TO_2, IF_ID_2))
+ goto fail;
+
+ /* Older iproute2 doesn't support this option */
+ if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
+ goto fail;
+
+ SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+ SYS("ip -net " NS0 " link set dev ipsec0 up");
+
+ SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+ SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS("ip -net " NS1 " link set dev ipsec0 up");
+
+ SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+ SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS("ip -net " NS2 " link set dev ipsec0 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
+{
+ skel->bss->req_if_id = if_id;
+
+ SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+
+ if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
+ goto fail;
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void _test_xfrm_info(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
+ struct nstoken *nstoken = NULL;
+ struct xfrm_info *skel;
+ int ifindex;
+
+ /* load and attach bpf progs to ipsec dev tc hook point */
+ skel = xfrm_info__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
+ goto done;
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+ ifindex = if_nametoindex("ipsec0");
+ if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
+ get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
+ if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
+ set_xfrm_info_prog_fd))
+ goto done;
+
+ /* perform test */
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
+ goto done;
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
+ goto done;
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ xfrm_info__destroy(skel);
+}
+
+void test_xfrm_info(void)
+{
+ cleanup();
+
+ if (!ASSERT_OK(config_underlay(), "config_underlay"))
+ goto done;
+ if (!ASSERT_OK(config_overlay(), "config_overlay"))
+ goto done;
+
+ if (test__start_subtest("xfrm_info"))
+ _test_xfrm_info();
+
+done:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
index 285c008cbf9c..9ba14c37bbcc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -7,14 +7,14 @@ char _license[] SEC("license") = "GPL";
unsigned long last_sym_value = 0;
-static inline char tolower(char c)
+static inline char to_lower(char c)
{
if (c >= 'A' && c <= 'Z')
c += ('a' - 'A');
return c;
}
-static inline char toupper(char c)
+static inline char to_upper(char c)
{
if (c >= 'a' && c <= 'z')
c -= ('a' - 'A');
@@ -54,7 +54,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx)
type = iter->type;
if (iter->module_name[0]) {
- type = iter->exported ? toupper(type) : tolower(type);
+ type = iter->exported ? to_upper(type) : to_lower(type);
BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
value, type, iter->name, iter->module_name);
} else {
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 5bb11fe595a4..4a01ea9113bf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -2,6 +2,11 @@
#ifndef __BPF_MISC_H__
#define __BPF_MISC_H__
+#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
+#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
+#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
+#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+
#if defined(__TARGET_ARCH_x86)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__x64_"
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index adb087aecc9e..b394817126cf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -25,6 +25,9 @@
#define IPV6_TCLASS 67
#define IPV6_AUTOFLOWLABEL 70
+#define TC_ACT_UNSPEC (-1)
+#define TC_ACT_SHOT 2
+
#define SOL_TCP 6
#define TCP_NODELAY 1
#define TCP_MAXSEG 2
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 8feddb8289cf..38f78d9345de 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -64,3 +64,4 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
return 0;
}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
new file mode 100644
index 000000000000..2d11ed528b6f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__u32 target_pid;
+__u64 cgroup_id;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?iter.s/cgroup")
+int cgroup_iter(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+ long *ptr;
+
+ if (cgrp == NULL)
+ return 0;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* ptr_to_btf_id semantics. should work. */
+ cgrp = task->cgroups->dfl_cgrp;
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int yes_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ cgrp = task->cgroups->dfl_cgrp;
+ /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index b0f08ff024fb..78debc1b3820 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -43,6 +43,7 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
} ringbuf SEC(".maps");
int err, val;
@@ -66,6 +67,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
* bpf_ringbuf_submit/discard_dynptr call
*/
SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
int ringbuf_missing_release1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -78,6 +80,7 @@ int ringbuf_missing_release1(void *ctx)
}
SEC("?raw_tp")
+__failure __msg("Unreleased reference id=2")
int ringbuf_missing_release2(void *ctx)
{
struct bpf_dynptr ptr1, ptr2;
@@ -113,6 +116,7 @@ static int missing_release_callback_fn(__u32 index, void *data)
/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
SEC("?raw_tp")
+__failure __msg("Unreleased reference id")
int ringbuf_missing_release_callback(void *ctx)
{
bpf_loop(10, missing_release_callback_fn, NULL, 0);
@@ -121,6 +125,7 @@ int ringbuf_missing_release_callback(void *ctx)
/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
int ringbuf_release_uninit_dynptr(void *ctx)
{
struct bpf_dynptr ptr;
@@ -133,6 +138,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
/* A dynptr can't be used after it has been invalidated */
SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
int use_after_invalid(void *ctx)
{
struct bpf_dynptr ptr;
@@ -152,6 +158,7 @@ int use_after_invalid(void *ctx)
/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
SEC("?raw_tp")
+__failure __msg("type=mem expected=ringbuf_mem")
int ringbuf_invalid_api(void *ctx)
{
struct bpf_dynptr ptr;
@@ -174,6 +181,7 @@ done:
/* Can't add a dynptr to a map */
SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
int add_dynptr_to_map1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -191,6 +199,7 @@ int add_dynptr_to_map1(void *ctx)
/* Can't add a struct with an embedded dynptr to a map */
SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
int add_dynptr_to_map2(void *ctx)
{
struct test_info x;
@@ -208,6 +217,7 @@ int add_dynptr_to_map2(void *ctx)
/* A data slice can't be accessed out of bounds */
SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
@@ -228,6 +238,7 @@ done:
}
SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
{
__u32 key = 0, map_val;
@@ -248,6 +259,7 @@ int data_slice_out_of_bounds_map_value(void *ctx)
/* A data slice can't be used after it has been released */
SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
int data_slice_use_after_release1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -279,6 +291,7 @@ done:
* ptr2 is at fp - 16).
*/
SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
int data_slice_use_after_release2(void *ctx)
{
struct bpf_dynptr ptr1, ptr2;
@@ -310,6 +323,7 @@ done:
/* A data slice must be first checked for NULL */
SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
int data_slice_missing_null_check1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -330,6 +344,7 @@ int data_slice_missing_null_check1(void *ctx)
/* A data slice can't be dereferenced if it wasn't checked for null */
SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
int data_slice_missing_null_check2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -352,6 +367,7 @@ done:
* dynptr argument
*/
SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
int invalid_helper1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -366,6 +382,7 @@ int invalid_helper1(void *ctx)
/* A dynptr can't be passed into a helper function at a non-zero offset */
SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
int invalid_helper2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -381,6 +398,7 @@ int invalid_helper2(void *ctx)
/* A bpf_dynptr is invalidated if it's been written into */
SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
int invalid_write1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -402,6 +420,7 @@ int invalid_write1(void *ctx)
* offset
*/
SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
int invalid_write2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -425,6 +444,7 @@ int invalid_write2(void *ctx)
* non-const offset
*/
SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
int invalid_write3(void *ctx)
{
struct bpf_dynptr ptr;
@@ -456,6 +476,7 @@ static int invalid_write4_callback(__u32 index, void *data)
* be invalidated as a dynptr
*/
SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
int invalid_write4(void *ctx)
{
struct bpf_dynptr ptr;
@@ -472,7 +493,9 @@ int invalid_write4(void *ctx)
/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
struct bpf_dynptr global_dynptr;
+
SEC("?raw_tp")
+__failure __msg("type=map_value expected=fp")
int global(void *ctx)
{
/* this should fail */
@@ -485,6 +508,7 @@ int global(void *ctx)
/* A direct read should fail */
SEC("?raw_tp")
+__failure __msg("invalid read from stack")
int invalid_read1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -501,6 +525,7 @@ int invalid_read1(void *ctx)
/* A direct read at an offset should fail */
SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset")
int invalid_read2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -516,6 +541,7 @@ int invalid_read2(void *ctx)
/* A direct read at an offset into the lower stack slot should fail */
SEC("?raw_tp")
+__failure __msg("invalid read from stack")
int invalid_read3(void *ctx)
{
struct bpf_dynptr ptr1, ptr2;
@@ -542,6 +568,7 @@ static int invalid_read4_callback(__u32 index, void *data)
/* A direct read within a callback function should fail */
SEC("?raw_tp")
+__failure __msg("invalid read from stack")
int invalid_read4(void *ctx)
{
struct bpf_dynptr ptr;
@@ -557,6 +584,7 @@ int invalid_read4(void *ctx)
/* Initializing a dynptr on an offset should fail */
SEC("?raw_tp")
+__failure __msg("invalid write to stack")
int invalid_offset(void *ctx)
{
struct bpf_dynptr ptr;
@@ -571,6 +599,7 @@ int invalid_offset(void *ctx)
/* Can't release a dynptr twice */
SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
int release_twice(void *ctx)
{
struct bpf_dynptr ptr;
@@ -597,6 +626,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
* within a calback function, fails
*/
SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
int release_twice_callback(void *ctx)
{
struct bpf_dynptr ptr;
@@ -612,6 +642,7 @@ int release_twice_callback(void *ctx)
/* Reject unsupported local mem types for dynptr_from_mem API */
SEC("?raw_tp")
+__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data")
int dynptr_from_mem_invalid_api(void *ctx)
{
struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index a3a6103c8569..35db7c6c1fc7 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -20,6 +20,7 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
} ringbuf SEC(".maps");
struct {
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 2c7b615c6d41..4ad88da5cda2 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -99,13 +99,28 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea
struct foo *f[8], *pf;
int i;
- for (i = 0; i < ARRAY_SIZE(f); i++) {
+ /* Loop following this check adds nodes 2-at-a-time in order to
+ * validate multiple release_on_unlock release logic
+ */
+ if (ARRAY_SIZE(f) % 2)
+ return 10;
+
+ for (i = 0; i < ARRAY_SIZE(f); i += 2) {
f[i] = bpf_obj_new(typeof(**f));
if (!f[i])
return 2;
f[i]->data = i;
+
+ f[i + 1] = bpf_obj_new(typeof(**f));
+ if (!f[i + 1]) {
+ bpf_obj_drop(f[i]);
+ return 9;
+ }
+ f[i + 1]->data = i + 1;
+
bpf_spin_lock(lock);
bpf_list_push_front(head, &f[i]->node);
+ bpf_list_push_front(head, &f[i + 1]->node);
bpf_spin_unlock(lock);
}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 05e209b1b12a..760e41e1a632 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -3,6 +3,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
struct map_value {
char buf[8];
@@ -23,6 +24,7 @@ extern struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
SEC("?tc")
+__failure __msg("kptr access size must be BPF_DW")
int size_not_bpf_dw(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -37,6 +39,7 @@ int size_not_bpf_dw(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("kptr access cannot have variable offset")
int non_const_var_off(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -55,6 +58,7 @@ int non_const_var_off(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. kptr has to be")
int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -73,6 +77,7 @@ int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=7")
int misaligned_access_write(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -88,6 +93,7 @@ int misaligned_access_write(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=1")
int misaligned_access_read(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -101,6 +107,7 @@ int misaligned_access_read(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)")
int reject_var_off_store(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *unref_ptr;
@@ -124,6 +131,7 @@ int reject_var_off_store(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc")
int reject_bad_type_match(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *unref_ptr;
@@ -144,6 +152,7 @@ int reject_bad_type_match(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
int marked_as_untrusted_or_null(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -158,6 +167,7 @@ int marked_as_untrusted_or_null(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4")
int correct_btf_id_check_size(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
@@ -175,6 +185,7 @@ int correct_btf_id_check_size(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_")
int inherit_untrusted_on_walk(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *unref_ptr;
@@ -194,6 +205,7 @@ int inherit_untrusted_on_walk(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("off=8 kptr isn't referenced kptr")
int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -208,6 +220,7 @@ int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("arg#0 expected pointer to map value")
int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
{
bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0);
@@ -215,6 +228,7 @@ int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("arg#0 expected pointer to map value")
int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
{
bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0);
@@ -222,6 +236,7 @@ int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("arg#0 no referenced kptr at map value offset=0")
int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -236,6 +251,7 @@ int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("arg#0 no referenced kptr at map value offset=8")
int reject_kptr_get_on_unref(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -250,6 +266,7 @@ int reject_kptr_get_on_unref(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("kernel function bpf_kfunc_call_test_kptr_get args#0")
int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -264,6 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -278,6 +296,7 @@ int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("store to referenced kptr disallowed")
int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
@@ -297,6 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
int reject_untrusted_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
@@ -315,6 +335,8 @@ int reject_untrusted_xchg(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure
+__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
int reject_bad_type_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
@@ -333,6 +355,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
int reject_member_of_ref_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *ref_ptr;
@@ -351,6 +374,7 @@ int reject_member_of_ref_xchg(struct __sk_buff *ctx)
}
SEC("?syscall")
+__failure __msg("kptr cannot be accessed indirectly by helper")
int reject_indirect_helper_access(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -371,6 +395,7 @@ int write_func(int *p)
}
SEC("?tc")
+__failure __msg("kptr cannot be accessed indirectly by helper")
int reject_indirect_global_func_access(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -384,6 +409,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("Unreleased reference id=5 alloc_insn=")
int kptr_xchg_ref_state(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
@@ -402,6 +428,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx)
}
SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=")
int kptr_get_ref_state(struct __sk_buff *ctx)
{
struct map_value *v;
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 94a970076b98..125f908024d3 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -23,13 +23,14 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
-struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int get_cgroup_id(void *ctx)
{
struct task_struct *task;
+ struct css_set *cgroups;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
@@ -37,7 +38,11 @@ int get_cgroup_id(void *ctx)
/* simulate bpf_get_current_cgroup_id() helper */
bpf_rcu_read_lock();
- cgroup_id = task->cgroups->dfl_cgrp->kn->id;
+ cgroups = task->cgroups;
+ if (!cgroups)
+ goto unlock;
+ cgroup_id = cgroups->dfl_cgrp->kn->id;
+unlock:
bpf_rcu_read_unlock();
return 0;
}
@@ -56,6 +61,8 @@ int task_succ(void *ctx)
bpf_rcu_read_lock();
/* region including helper using rcu ptr real_parent */
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
@@ -92,7 +99,10 @@ int two_regions(void *ctx)
bpf_rcu_read_unlock();
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
bpf_rcu_read_unlock();
return 0;
}
@@ -105,7 +115,10 @@ int non_sleepable_1(void *ctx)
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
bpf_rcu_read_unlock();
return 0;
}
@@ -121,7 +134,10 @@ int non_sleepable_2(void *ctx)
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
bpf_rcu_read_unlock();
return 0;
}
@@ -129,16 +145,33 @@ int non_sleepable_2(void *ctx)
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int task_acquire(void *ctx)
{
- struct task_struct *task, *real_parent;
+ struct task_struct *task, *real_parent, *gparent;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+
+ /* rcu_ptr->rcu_field */
+ gparent = real_parent->real_parent;
+ if (!gparent)
+ goto out;
+
/* acquire a reference which can be used outside rcu read lock region */
- real_parent = bpf_task_acquire(real_parent);
+ gparent = bpf_task_acquire_not_zero(gparent);
+ if (!gparent)
+ /* Until we resolve the issues with using task->rcu_users, we
+ * expect bpf_task_acquire_not_zero() to return a NULL task.
+ * See the comment at the definition of
+ * bpf_task_acquire_not_zero() for more details.
+ */
+ goto out;
+
+ (void)bpf_task_storage_get(&map_a, gparent, 0, 0);
+ bpf_task_release(gparent);
+out:
bpf_rcu_read_unlock();
- (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
- bpf_task_release(real_parent);
return 0;
}
@@ -181,9 +214,12 @@ int non_sleepable_rcu_mismatch(void *ctx)
/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
if (real_parent)
bpf_rcu_read_unlock();
+out:
return 0;
}
@@ -199,16 +235,17 @@ int inproper_sleepable_helper(void *ctx)
/* sleepable helper in rcu read lock region */
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
- if (!regs) {
- bpf_rcu_read_unlock();
- return 0;
- }
+ if (!regs)
+ goto out;
ptr = (void *)PT_REGS_IP(regs);
(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
user_data = value;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
bpf_rcu_read_unlock();
return 0;
}
@@ -239,7 +276,10 @@ int nested_rcu_region(void *ctx)
bpf_rcu_read_lock();
bpf_rcu_read_lock();
real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
bpf_rcu_read_unlock();
bpf_rcu_read_unlock();
return 0;
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index e310473190d5..87fa1db9d9b5 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -271,3 +271,14 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl
return 0;
}
+
+SEC("lsm/task_free")
+int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
+{
+ struct task_struct *acquired;
+
+ /* the argument of lsm task_free hook is untrusted. */
+ acquired = bpf_task_acquire(task);
+ bpf_task_release(acquired);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
index 60c7ead41cfc..9f359cfd29e7 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -123,12 +123,17 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
}
kptr = bpf_task_kptr_get(&v->task);
- if (!kptr) {
+ if (kptr) {
+ /* Until we resolve the issues with using task->rcu_users, we
+ * expect bpf_task_kptr_get() to return a NULL task. See the
+ * comment at the definition of bpf_task_acquire_not_zero() for
+ * more details.
+ */
+ bpf_task_release(kptr);
err = 3;
return 0;
}
- bpf_task_release(kptr);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index ce39d096bba3..f4a8250329b2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -33,18 +33,6 @@ int err, pid;
char _license[] SEC("license") = "GPL";
SEC("?lsm.s/bpf")
-int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr,
- unsigned int size)
-{
- char write_data[64] = "hello there, world!!";
- struct bpf_dynptr ptr;
-
- bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
-
- return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL);
-}
-
-SEC("?lsm.s/bpf")
int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
{
unsigned long val;
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 82aba4529aa9..f3201dc69a60 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -18,6 +18,13 @@ struct {
__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
} user_ringbuf SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 2);
+} ringbuf SEC(".maps");
+
+static int map_value;
+
static long
bad_access1(struct bpf_dynptr *dynptr, void *context)
{
@@ -32,7 +39,7 @@ bad_access1(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to read before the pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_bad_access1(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
@@ -54,7 +61,7 @@ bad_access2(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to read past the end of the pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_bad_access2(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
@@ -73,7 +80,7 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to write to that pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_write_forbidden(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
@@ -92,7 +99,7 @@ null_context_write(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to write to that pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_null_context_write(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
@@ -113,7 +120,7 @@ null_context_read(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to write to that pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_null_context_read(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
@@ -132,7 +139,7 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to read past the end of the pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_discard_dynptr(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
@@ -151,7 +158,7 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to read past the end of the pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_submit_dynptr(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
@@ -168,10 +175,38 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
* not be able to write to that pointer.
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
int user_ringbuf_callback_invalid_return(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
return 0;
}
+
+static long
+try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_dynptr_from_mem(&map_value, 4, 0, dynptr);
+ return 0;
+}
+
+static long
+try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr);
+ return 0;
+}
+
+SEC("?raw_tp/")
+int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
+ return 0;
+}
+
+SEC("?raw_tp/")
+int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
new file mode 100644
index 000000000000..f6a501fbba2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+
+struct bpf_xfrm_info___local {
+ u32 if_id;
+ int link;
+} __attribute__((preserve_access_index));
+
+__u32 req_if_id;
+__u32 resp_if_id;
+
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+ const struct bpf_xfrm_info___local *from) __ksym;
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
+ struct bpf_xfrm_info___local *to) __ksym;
+
+SEC("tc")
+int set_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = { .if_id = req_if_id };
+
+ return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
+}
+
+SEC("tc")
+int get_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = {};
+
+ if (bpf_skb_get_xfrm_info(skb, &info) < 0)
+ return TC_ACT_SHOT;
+
+ resp_if_id = info.if_id;
+
+ return TC_ACT_UNSPEC;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index 19ad172036da..0bd9990e83fa 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#include <iostream>
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
#include <bpf/libbpf.h>
-#pragma GCC diagnostic pop
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include "test_core_extern.skel.h"
@@ -99,6 +99,7 @@ int main(int argc, char *argv[])
struct btf_dump_opts opts = { };
struct test_core_extern *skel;
struct btf *btf;
+ int fd;
try_skeleton_template();
@@ -117,6 +118,12 @@ int main(int argc, char *argv[])
skel = test_core_extern__open_and_load();
test_core_extern__destroy(skel);
+ fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (fd < 0)
+ std::cout << "FAILED to enable stats: " << fd << std::endl;
+ else
+ ::close(fd);
+
std::cout << "DONE!" << std::endl;
return 0;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
new file mode 100644
index 000000000000..679efb3aa785
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <stdlib.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#define str_has_pfx(str, pfx) \
+ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+
+#define TEST_LOADER_LOG_BUF_SZ 1048576
+
+#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
+#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
+#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+
+struct test_spec {
+ const char *name;
+ bool expect_failure;
+ const char *expect_msg;
+ int log_level;
+};
+
+static int tester_init(struct test_loader *tester)
+{
+ if (!tester->log_buf) {
+ tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
+ tester->log_buf = malloc(tester->log_buf_sz);
+ if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void test_loader_fini(struct test_loader *tester)
+{
+ if (!tester)
+ return;
+
+ free(tester->log_buf);
+}
+
+static int parse_test_spec(struct test_loader *tester,
+ struct bpf_object *obj,
+ struct bpf_program *prog,
+ struct test_spec *spec)
+{
+ struct btf *btf;
+ int func_id, i;
+
+ memset(spec, 0, sizeof(*spec));
+
+ spec->name = bpf_program__name(prog);
+
+ btf = bpf_object__btf(obj);
+ if (!btf) {
+ ASSERT_FAIL("BPF object has no BTF");
+ return -EINVAL;
+ }
+
+ func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC);
+ if (func_id < 0) {
+ ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ const struct btf_type *t;
+ const char *s;
+
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_decl_tag(t))
+ continue;
+
+ if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ s = btf__str_by_offset(btf, t->name_off);
+ if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+ spec->expect_failure = true;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
+ spec->expect_failure = false;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
+ spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+ errno = 0;
+ spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0);
+ if (errno) {
+ ASSERT_FAIL("failed to parse test log level from '%s'", s);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void prepare_case(struct test_loader *tester,
+ struct test_spec *spec,
+ struct bpf_object *obj,
+ struct bpf_program *prog)
+{
+ int min_log_level = 0;
+
+ if (env.verbosity > VERBOSE_NONE)
+ min_log_level = 1;
+ if (env.verbosity > VERBOSE_VERY)
+ min_log_level = 2;
+
+ bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
+
+ /* Make sure we set at least minimal log level, unless test requirest
+ * even higher level already. Make sure to preserve independent log
+ * level 4 (verifier stats), though.
+ */
+ if ((spec->log_level & 3) < min_log_level)
+ bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level);
+ else
+ bpf_program__set_log_level(prog, spec->log_level);
+
+ tester->log_buf[0] = '\0';
+}
+
+static void emit_verifier_log(const char *log_buf, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf);
+}
+
+static void validate_case(struct test_loader *tester,
+ struct test_spec *spec,
+ struct bpf_object *obj,
+ struct bpf_program *prog,
+ int load_err)
+{
+ if (spec->expect_msg) {
+ char *match;
+
+ match = strstr(tester->log_buf, spec->expect_msg);
+ if (!ASSERT_OK_PTR(match, "expect_msg")) {
+ /* if we are in verbose mode, we've already emitted log */
+ if (env.verbosity == VERBOSE_NONE)
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg);
+ return;
+ }
+ }
+}
+
+/* this function is forced noinline and has short generic name to look better
+ * in test_progs output (in case of a failure)
+ */
+static noinline
+void run_subtest(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
+ struct bpf_object *obj = NULL, *tobj;
+ struct bpf_program *prog, *tprog;
+ const void *obj_bytes;
+ size_t obj_byte_cnt;
+ int err;
+
+ if (tester_init(tester) < 0)
+ return; /* failed to initialize tester */
+
+ obj_bytes = elf_bytes_factory(&obj_byte_cnt);
+ obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
+ return;
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *prog_name = bpf_program__name(prog);
+ struct test_spec spec;
+
+ if (!test__start_subtest(prog_name))
+ continue;
+
+ /* if we can't derive test specification, go to the next test */
+ err = parse_test_spec(tester, obj, prog, &spec);
+ if (!ASSERT_OK(err, "parse_test_spec"))
+ continue;
+
+ tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+ if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
+ continue;
+
+ bpf_object__for_each_program(tprog, tobj)
+ bpf_program__set_autoload(tprog, false);
+
+ bpf_object__for_each_program(tprog, tobj) {
+ /* only load specified program */
+ if (strcmp(bpf_program__name(tprog), prog_name) == 0) {
+ bpf_program__set_autoload(tprog, true);
+ break;
+ }
+ }
+
+ prepare_case(tester, &spec, tobj, tprog);
+
+ err = bpf_object__load(tobj);
+ if (spec.expect_failure) {
+ if (!ASSERT_ERR(err, "unexpected_load_success")) {
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ goto tobj_cleanup;
+ }
+ } else {
+ if (!ASSERT_OK(err, "unexpected_load_failure")) {
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ goto tobj_cleanup;
+ }
+ }
+
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ validate_case(tester, &spec, tobj, tprog, err);
+
+tobj_cleanup:
+ bpf_object__close(tobj);
+ }
+
+ bpf_object__close(obj);
+}
+
+void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ /* see comment in run_subtest() for why we do this function nesting */
+ run_subtest(tester, skel_name, elf_bytes_factory);
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 7fc15e0d24a9..7cb1bc05e5cf 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -769,12 +769,14 @@ skip(ret != 0, "bpftool not installed")
base_progs = progs
_, base_maps = bpftool("map")
base_map_names = [
- 'pid_iter.rodata' # created on each bpftool invocation
+ 'pid_iter.rodata', # created on each bpftool invocation
+ 'libbpf_det_bind', # created on each bpftool invocation
]
# Check netdevsim
-ret, out = cmd("modprobe netdevsim", fail=False)
-skip(ret != 0, "netdevsim module could not be loaded")
+if not os.path.isdir("/sys/bus/netdevsim/"):
+ ret, out = cmd("modprobe netdevsim", fail=False)
+ skip(ret != 0, "netdevsim module could not be loaded")
# Check debugfs
_, out = cmd("mount")
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index b090996daee5..3f058dfadbaf 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TEST_PROGS_H
+#define __TEST_PROGS_H
+
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
@@ -210,6 +213,12 @@ int test__join_cgroup(const char *path);
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
+#define ASSERT_FAIL(fmt, args...) ({ \
+ static int duration = 0; \
+ CHECK(false, "", fmt"\n", ##args); \
+ false; \
+})
+
#define ASSERT_TRUE(actual, name) ({ \
static int duration = 0; \
bool ___ok = (actual); \
@@ -397,3 +406,27 @@ int write_sysctl(const char *sysctl, const char *value);
#endif
#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
+
+struct test_loader {
+ char *log_buf;
+ size_t log_buf_sz;
+
+ struct bpf_object *obj;
+};
+
+typedef const void *(*skel_elf_bytes_fn)(size_t *sz);
+
+extern void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory);
+
+extern void test_loader_fini(struct test_loader *tester);
+
+#define RUN_TESTS(skel) ({ \
+ struct test_loader tester = {}; \
+ \
+ test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \
+ test_loader_fini(&tester); \
+})
+
+#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index e768181a1bd7..024a0faafb3b 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1690,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
{
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 0;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 3193915c5ee6..9d993926bf0e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -76,7 +76,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "arg#0 expected pointer to ctx, but got PTR",
+ .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_pass_ctx", 2 },
},
@@ -2305,3 +2305,85 @@
.errstr = "!read_ok",
.result = REJECT,
},
+/* Make sure that verifier.c:states_equal() considers IDs from all
+ * frames when building 'idmap' for check_ids().
+ */
+{
+ "calls: check_ids() across call boundary",
+ .insns = {
+ /* Function main() */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24),
+ /* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32),
+ /* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current
+ * ; stack frame
+ */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_CALL_REL(2),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* Function foo()
+ *
+ * r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers,
+ * r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value
+ */
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_2),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * r9 = r8
+ */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+ /* r9 = *r9 ; verifier get's to this point via two paths:
+ * ; (I) one including r9 = r8, verified first;
+ * ; (II) one excluding r9 = r8, verified next.
+ * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
+ * ; Suppose that checkpoint is created here via path (I).
+ * ; When verifying via (II) the r9.id must be compared against
+ * ; frame[0].fp[-24].id, otherwise (I) and (II) would be
+ * ; incorrectly deemed equivalent.
+ * if r9 == 0 goto <exit>
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+ /* r8 = *r8 ; read map value via r8, this is not safe
+ * r0 = *r8 ; because r8 might be not equal to r9.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .fixup_map_hash_8b = { 3, 9 },
+ .result = REJECT,
+ .errstr = "R8 invalid mem access 'map_value_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index 11acd1855acf..dce2e28aeb43 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -654,3 +654,57 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
+{
+ "direct packet access: test30 (check_id() in regsafe(), bad access)",
+ .insns = {
+ /* r9 = ctx */
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* r2 = ctx->data
+ * r3 = ctx->data
+ * r4 = ctx->data_end
+ */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9, offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9, offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_9, offsetof(struct __sk_buff, data_end)),
+ /* if r6 > 100 goto exit
+ * if r7 > 100 goto exit
+ */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 100, 9),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 100, 8),
+ /* r2 += r6 ; this forces assignment of ID to r2
+ * r2 += 1 ; get some fixed off for r2
+ * r3 += r7 ; this forces assignment of ID to r3
+ * r3 += 1 ; get some fixed off for r3
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * r2 = r3 ; optionally share ID between r2 and r3
+ */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ /* if r3 > ctx->data_end goto exit */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 1),
+ /* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2,
+ * ; this is not always safe
+ */
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, -1),
+ /* exit(0) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=0 size=1, R2",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index 1f82021429bf..17ee84dc7766 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -9,7 +9,7 @@
},
.fixup_map_array_48b = { 1 },
.result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
.result = REJECT,
.errstr = "R1 is bpf_array invalid negative access: off=-8",
},
@@ -26,7 +26,7 @@
},
.fixup_map_array_48b = { 3 },
.result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
.result = REJECT,
.errstr = "only read from bpf_array is supported",
},
@@ -41,7 +41,7 @@
},
.fixup_map_array_48b = { 1 },
.result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
.result = REJECT,
.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -57,7 +57,7 @@
},
.fixup_map_array_48b = { 1 },
.result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
.result = ACCEPT,
.retval = 1,
},
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
index 84838feba47f..92e3f6a61a79 100644
--- a/tools/testing/selftests/bpf/verifier/ringbuf.c
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -28,7 +28,7 @@
},
.fixup_map_ringbuf = { 1 },
.result = REJECT,
- .errstr = "dereference of modified ringbuf_mem ptr R1",
+ .errstr = "R1 must have zero offset when passed to release func",
},
{
"ringbuf: invalid reservation offset 2",
diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c
index 781621facae4..eaf114f07e2e 100644
--- a/tools/testing/selftests/bpf/verifier/spin_lock.c
+++ b/tools/testing/selftests/bpf/verifier/spin_lock.c
@@ -331,3 +331,117 @@
.errstr = "inside bpf_spin_lock",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
+{
+ "spin_lock: regsafe compare reg->id for map value",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_6, offsetof(struct __sk_buff, mark)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_spin_lock = { 2 },
+ .result = REJECT,
+ .errstr = "bpf_spin_unlock of different lock",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = BPF_F_TEST_STATE_FREQ,
+},
+/* Make sure that regsafe() compares ids for spin lock records using
+ * check_ids():
+ * 1: r9 = map_lookup_elem(...) ; r9.id == 1
+ * 2: r8 = map_lookup_elem(...) ; r8.id == 2
+ * 3: r7 = ktime_get_ns()
+ * 4: r6 = ktime_get_ns()
+ * 5: if r6 > r7 goto <9>
+ * 6: spin_lock(r8)
+ * 7: r9 = r8
+ * 8: goto <10>
+ * 9: spin_lock(r9)
+ * 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active,
+ * ; second visit to (10) should be considered safe
+ * ; if check_ids() is used.
+ * 11: exit(0)
+ */
+{
+ "spin_lock: regsafe() check_ids() similar id mappings",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ /* r9 = map_lookup_elem(...) */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 24),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+ /* r8 = map_lookup_elem(...) */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 18),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if r6 > r7 goto +5 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * spin_lock(r8)
+ * r9 = r8
+ * goto unlock
+ */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+ BPF_JMP_A(3),
+ /* spin_lock(r9) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+ /* spin_unlock(r9) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_EMIT_CALL(BPF_FUNC_spin_unlock),
+ /* exit(0) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_spin_lock = { 3, 10 },
+ .result = VERBOSE_ACCEPT,
+ .errstr = "28: safe",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .flags = BPF_F_TEST_STATE_FREQ,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 3ecb70a3d939..52a8bca14f03 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -169,3 +169,52 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
+{
+ "MAP_VALUE_OR_NULL check_ids() in regsafe()",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* r9 = map_lookup_elem(...) */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+ /* r8 = map_lookup_elem(...) */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * r9 = r8 ; optionally share ID between r9 and r8
+ */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+ /* if r9 == 0 goto <exit> */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+ /* read map value via r8, this is not always
+ * safe because r8 might be not equal to r9.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .fixup_map_hash_8b = { 3, 9 },
+ .result = REJECT,
+ .errstr = "R8 invalid mem access 'map_value_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},