summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/sockmap.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-10-01 16:58:57 +0200
committerJens Axboe <axboe@kernel.dk>2018-10-01 16:58:57 +0200
commitc0aac682fa6590cb660cb083dbc09f55e799d2d2 (patch)
tree28ded5b660c42661c29e1d4abdeb1a5e92a258ed /kernel/bpf/sockmap.c
parentblk-iolatency: keep track of previous windows stats (diff)
parentLinux 4.19-rc6 (diff)
downloadlinux-c0aac682fa6590cb660cb083dbc09f55e799d2d2.tar.xz
linux-c0aac682fa6590cb660cb083dbc09f55e799d2d2.zip
Merge tag 'v4.19-rc6' into for-4.20/block
Merge -rc6 in, for two reasons: 1) Resolve a trivial conflict in the blk-mq-tag.c documentation 2) A few important regression fixes went into upstream directly, so they aren't in the 4.20 branch. Signed-off-by: Jens Axboe <axboe@kernel.dk> * tag 'v4.19-rc6': (780 commits) Linux 4.19-rc6 MAINTAINERS: fix reference to moved drivers/{misc => auxdisplay}/panel.c cpufreq: qcom-kryo: Fix section annotations perf/core: Add sanity check to deal with pinned event failure xen/blkfront: correct purging of persistent grants Revert "xen/blkfront: When purging persistent grants, keep them in the buffer" selftests/powerpc: Fix Makefiles for headers_install change blk-mq: I/O and timer unplugs are inverted in blktrace dax: Fix deadlock in dax_lock_mapping_entry() x86/boot: Fix kexec booting failure in the SEV bit detection code bcache: add separate workqueue for journal_write to avoid deadlock drm/amd/display: Fix Edid emulation for linux drm/amd/display: Fix Vega10 lightup on S3 resume drm/amdgpu: Fix vce work queue was not cancelled when suspend Revert "drm/panel: Add device_link from panel device to DRM device" xen/blkfront: When purging persistent grants, keep them in the buffer clocksource/drivers/timer-atmel-pit: Properly handle error cases block: fix deadline elevator drain for zoned block devices ACPI / hotplug / PCI: Don't scan for non-hotplug bridges if slot is not bridge drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set ... Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'kernel/bpf/sockmap.c')
-rw-r--r--kernel/bpf/sockmap.c91
1 files changed, 71 insertions, 20 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 488ef9663c01..0a0f2ec75370 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -132,6 +132,7 @@ struct smap_psock {
struct work_struct gc_work;
struct proto *sk_proto;
+ void (*save_unhash)(struct sock *sk);
void (*save_close)(struct sock *sk, long timeout);
void (*save_data_ready)(struct sock *sk);
void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
+static void bpf_tcp_unhash(struct sock *sk);
static void bpf_tcp_close(struct sock *sk, long timeout);
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
struct proto *base)
{
prot[SOCKMAP_BASE] = *base;
+ prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
prot[SOCKMAP_BASE].close = bpf_tcp_close;
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
return -EBUSY;
}
+ psock->save_unhash = sk->sk_prot->unhash;
psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
return e;
}
-static void bpf_tcp_close(struct sock *sk, long timeout)
+static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
{
- void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e;
struct sk_msg_buff *md, *mtmp;
- struct smap_psock *psock;
struct sock *osk;
- lock_sock(sk);
- rcu_read_lock();
- psock = smap_psock_sk(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
- }
-
- /* The psock may be destroyed anytime after exiting the RCU critial
- * section so by the time we use close_fun the psock may no longer
- * be valid. However, bpf_tcp_close is called with the sock lock
- * held so the close hook and sk are still valid.
- */
- close_fun = psock->save_close;
-
if (psock->cork) {
free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
kfree(e);
e = psock_map_pop(sk, psock);
}
+}
+
+static void bpf_tcp_unhash(struct sock *sk)
+{
+ void (*unhash_fun)(struct sock *sk);
+ struct smap_psock *psock;
+
+ rcu_read_lock();
+ psock = smap_psock_sk(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->unhash)
+ sk->sk_prot->unhash(sk);
+ return;
+ }
+ unhash_fun = psock->save_unhash;
+ bpf_tcp_remove(sk, psock);
+ rcu_read_unlock();
+ unhash_fun(sk);
+}
+
+static void bpf_tcp_close(struct sock *sk, long timeout)
+{
+ void (*close_fun)(struct sock *sk, long timeout);
+ struct smap_psock *psock;
+
+ lock_sock(sk);
+ rcu_read_lock();
+ psock = smap_psock_sk(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ release_sock(sk);
+ return sk->sk_prot->close(sk, timeout);
+ }
+ close_fun = psock->save_close;
+ bpf_tcp_remove(sk, psock);
rcu_read_unlock();
release_sock(sk);
close_fun(sk, timeout);
@@ -2097,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state.
+ */
if (skops.sk->sk_type != SOCK_STREAM ||
- skops.sk->sk_protocol != IPPROTO_TCP) {
+ skops.sk->sk_protocol != IPPROTO_TCP ||
+ skops.sk->sk_state != TCP_ESTABLISHED) {
fput(socket->file);
return -EOPNOTSUPP;
}
@@ -2453,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
return -EINVAL;
}
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state.
+ */
+ if (skops.sk->sk_type != SOCK_STREAM ||
+ skops.sk->sk_protocol != IPPROTO_TCP ||
+ skops.sk->sk_state != TCP_ESTABLISHED) {
+ fput(socket->file);
+ return -EOPNOTSUPP;
+ }
+
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
@@ -2543,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = {
.map_check_btf = map_check_no_btf,
};
+static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
+{
+ return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+ ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
struct bpf_map *, map, void *, key, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held());
+
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state. This checks that the sock ops triggering the update is
+ * one indicating we are (or will be soon) in an ESTABLISHED state.
+ */
+ if (!bpf_is_valid_sock_op(bpf_sock))
+ return -EOPNOTSUPP;
return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
}
@@ -2565,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
struct bpf_map *, map, void *, key, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!bpf_is_valid_sock_op(bpf_sock))
+ return -EOPNOTSUPP;
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
}