summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-09-06 16:49:17 +0200
committerDavid S. Miller <davem@davemloft.net>2019-09-06 16:49:17 +0200
commit1e46c09ec10049a9e366153b32e41cc557383fdb (patch)
treeb2f86d40fd1ddf32522b5dd1792158daeab72e21 /net/core
parentlan743x: remove redundant assignment to variable rx_process_result (diff)
parentMerge branch 'bpf-af-xdp-barrier-fixes' (diff)
downloadlinux-1e46c09ec10049a9e366153b32e41cc557383fdb.tar.xz
linux-1e46c09ec10049a9e366153b32e41cc557383fdb.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add the ability to use unaligned chunks in the AF_XDP umem. By relaxing where the chunks can be placed, it allows to use an arbitrary buffer size and place whenever there is a free address in the umem. Helps more seamless DPDK AF_XDP driver integration. Support for i40e, ixgbe and mlx5e, from Kevin and Maxim. 2) Addition of a wakeup flag for AF_XDP tx and fill rings so the application can wake up the kernel for rx/tx processing which avoids busy-spinning of the latter, useful when app and driver is located on the same core. Support for i40e, ixgbe and mlx5e, from Magnus and Maxim. 3) bpftool fixes for printf()-like functions so compiler can actually enforce checks, bpftool build system improvements for custom output directories, and addition of 'bpftool map freeze' command, from Quentin. 4) Support attaching/detaching XDP programs from 'bpftool net' command, from Daniel. 5) Automatic xskmap cleanup when AF_XDP socket is released, and several barrier/{read,write}_once fixes in AF_XDP code, from Björn. 6) Relicense of bpf_helpers.h/bpf_endian.h for future libbpf inclusion as well as libbpf versioning improvements, from Andrii. 7) Several new BPF kselftests for verifier precision tracking, from Alexei. 8) Several BPF kselftest fixes wrt endianess to run on s390x, from Ilya. 9) And more BPF kselftest improvements all over the place, from Stanislav. 10) Add simple BPF map op cache for nfp driver to batch dumps, from Jakub. 11) AF_XDP socket umem mapping improvements for 32bit archs, from Ivan. 12) Add BPF-to-BPF call and BTF line info support for s390x JIT, from Yauheni. 13) Small optimization in arm64 JIT to spare 1 insns for BPF_MOD, from Jerin. 14) Fix an error check in bpf_tcp_gen_syncookie() helper, from Petar. 15) Various minor fixes and cleanups, from Nathan, Masahiro, Masanari, Peter, Wei, Yue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c104
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/sock.c9
4 files changed, 121 insertions, 9 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
static atomic_t cache_idx;
+#define SK_STORAGE_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu);
}
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem)
{
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ /* Note that this map might be concurrently cloned from
+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+ * RCU read section to finish before proceeding. New RCU
+ * read sections should be prevented via bpf_map_inc_not_zero.
+ */
synchronize_rcu();
/* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
- if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+ if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+ !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+ attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+ struct bpf_sk_storage_map *smap,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_sk_storage_elem *copy_selem;
+
+ copy_selem = selem_alloc(smap, newsk, NULL, true);
+ if (!copy_selem)
+ return NULL;
+
+ if (map_value_has_spin_lock(&smap->map))
+ copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data, true);
+ else
+ copy_map_value(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data);
+
+ return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+ struct bpf_sk_storage *new_sk_storage = NULL;
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ int ret = 0;
+
+ RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+ rcu_read_lock();
+ sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+ if (!sk_storage || hlist_empty(&sk_storage->list))
+ goto out;
+
+ hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+ struct bpf_sk_storage_elem *copy_selem;
+ struct bpf_sk_storage_map *smap;
+ struct bpf_map *map;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
+ if (!(smap->map.map_flags & BPF_F_CLONE))
+ continue;
+
+ /* Note that for lockless listeners adding new element
+ * here can race with cleanup in bpf_sk_storage_map_free.
+ * Try to grab map refcnt to make sure that it's still
+ * alive and prevent concurrent removal.
+ */
+ map = bpf_map_inc_not_zero(&smap->map, false);
+ if (IS_ERR(map))
+ continue;
+
+ copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+ if (!copy_selem) {
+ ret = -ENOMEM;
+ bpf_map_put(map);
+ goto out;
+ }
+
+ if (new_sk_storage) {
+ selem_link_map(smap, copy_selem);
+ __selem_link_sk(new_sk_storage, copy_selem);
+ } else {
+ ret = sk_storage_alloc(newsk, smap, copy_selem);
+ if (ret) {
+ kfree(copy_selem);
+ atomic_sub(smap->elem_size,
+ &newsk->sk_omem_alloc);
+ bpf_map_put(map);
+ goto out;
+ }
+
+ new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+ }
+ bpf_map_put(map);
+ }
+
+out:
+ rcu_read_unlock();
+
+ /* In case of an error, don't free anything explicitly here, the
+ * caller is responsible to call bpf_sk_storage_free.
+ */
+
+ return ret;
+}
+
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 49589ed2018d..b1afafee3e2a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8126,12 +8126,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
+ u32 prog_id;
+
if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
return -EEXIST;
}
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_query(dev, bpf_op, query)) {
+
+ prog_id = __dev_xdp_query(dev, bpf_op, query);
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
NL_SET_ERR_MSG(extack, "XDP program already attached");
return -EBUSY;
}
@@ -8146,6 +8149,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_prog_put(prog);
return -EINVAL;
}
+
+ if (prog->aux->id == prog_id) {
+ bpf_prog_put(prog);
+ return 0;
+ }
+ } else {
+ if (!__dev_xdp_query(dev, bpf_op, query))
+ return 0;
}
err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index b91988f8b94e..ed6563622ce3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5903,7 +5903,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
default:
return -EPROTONOSUPPORT;
}
- if (mss <= 0)
+ if (mss == 0)
return -ENOENT;
return cookie | ((u64)mss << 32);
diff --git a/net/core/sock.c b/net/core/sock.c
index 545fac19a711..07863edbe6fc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
- RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+ if (bpf_sk_storage_clone(sk, newsk)) {
+ sk_free_unlock_clone(newsk);
+ newsk = NULL;
+ goto out;
+ }
newsk->sk_err = 0;
newsk->sk_err_soft = 0;