summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-10-13 01:16:50 +0200
committerJakub Kicinski <kuba@kernel.org>2020-10-13 01:16:50 +0200
commitccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (patch)
tree3028901b29bf4ab04cd50d61da4fecdb20b182b6 /net
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next (diff)
parentMerge branch 'bpf, sockmap: allow verdict only sk_skb progs' (diff)
downloadlinux-ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae.tar.xz
linux-ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/filter.c107
-rw-r--r--net/core/skmsg.c161
-rw-r--r--net/core/sock_map.c37
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xdp/xsk_queue.h4
-rw-r--r--net/xdp/xskmap.c2
8 files changed, 262 insertions, 68 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a146bac84320..22babf5bcc88 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4930,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
static inline struct sk_buff *
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
+ struct net_device *orig_dev, bool *another)
{
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
@@ -4974,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* redirecting to another netdev
*/
__skb_push(skb, skb->mac_len);
- skb_do_redirect(skb);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
return NULL;
case TC_ACT_CONSUMED:
return NULL;
@@ -5163,7 +5167,12 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_INGRESS
if (static_branch_unlikely(&ingress_needed_key)) {
- skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+ bool another = false;
+
+ skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
+ &another);
+ if (another)
+ goto another_round;
if (!skb)
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index bc6bd2b323e8..c5e2a1c5fd8d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -76,6 +76,7 @@
#include <net/bpf_sk_storage.h>
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
+#include <net/tls.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -2379,8 +2380,9 @@ out:
/* Internal, non-exposed redirect flags. */
enum {
- BPF_F_NEIGH = (1ULL << 1),
-#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH)
+ BPF_F_NEIGH = (1ULL << 1),
+ BPF_F_PEER = (1ULL << 2),
+#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
};
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2429,19 +2431,35 @@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
int skb_do_redirect(struct sk_buff *skb)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct net *net = dev_net(skb->dev);
struct net_device *dev;
u32 flags = ri->flags;
- dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+ dev = dev_get_by_index_rcu(net, ri->tgt_index);
ri->tgt_index = 0;
- if (unlikely(!dev)) {
- kfree_skb(skb);
- return -EINVAL;
+ ri->flags = 0;
+ if (unlikely(!dev))
+ goto out_drop;
+ if (flags & BPF_F_PEER) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (unlikely(!ops->ndo_get_peer_dev ||
+ !skb_at_tc_ingress(skb)))
+ goto out_drop;
+ dev = ops->ndo_get_peer_dev(dev);
+ if (unlikely(!dev ||
+ !is_skb_forwardable(dev, skb) ||
+ net_eq(net, dev_net(dev))))
+ goto out_drop;
+ skb->dev = dev;
+ return -EAGAIN;
}
-
return flags & BPF_F_NEIGH ?
__bpf_redirect_neigh(skb, dev) :
__bpf_redirect(skb, dev, flags);
+out_drop:
+ kfree_skb(skb);
+ return -EINVAL;
}
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
@@ -2465,6 +2483,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+
+ ri->flags = BPF_F_PEER;
+ ri->tgt_index = ifindex;
+
+ return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_peer_proto = {
+ .func = bpf_redirect_peer,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -3479,6 +3518,48 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
SKB_MAX_ALLOC;
}
+BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+ u32, mode, u64, flags)
+{
+ u32 len_diff_abs = abs(len_diff);
+ bool shrink = len_diff < 0;
+ int ret = 0;
+
+ if (unlikely(flags || mode))
+ return -EINVAL;
+ if (unlikely(len_diff_abs > 0xfffU))
+ return -EFAULT;
+
+ if (!shrink) {
+ ret = skb_cow(skb, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+ __skb_push(skb, len_diff_abs);
+ memset(skb->data, 0, len_diff_abs);
+ } else {
+ if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
+ return -ENOMEM;
+ __skb_pull(skb, len_diff_abs);
+ }
+ bpf_compute_data_end_sk_skb(skb);
+ if (tls_sw_has_ctx_rx(skb->sk)) {
+ struct strp_msg *rxm = strp_msg(skb);
+
+ rxm->full_len += len_diff;
+ }
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_adjust_room_proto = {
+ .func = sk_skb_adjust_room,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32, mode, u64, flags)
{
@@ -4784,6 +4865,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
icsk->icsk_user_timeout = val;
break;
+ case TCP_NOTSENT_LOWAT:
+ tp->notsent_lowat = val;
+ sk->sk_write_space(sk);
+ break;
default:
ret = -EINVAL;
}
@@ -5149,7 +5234,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
- params->ifindex = dev->ifindex;
return 0;
}
@@ -5246,6 +5330,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
dev = nhc->nhc_dev;
params->rt_metric = res.fi->fib_priority;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
@@ -5371,6 +5456,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
dev = res.nh->fib_nh_dev;
params->rt_metric = res.f6i->fib6_metric;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
@@ -6745,6 +6831,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_change_tail ||
func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
+ func == sk_skb_adjust_room ||
func == bpf_skb_pull_data ||
func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
@@ -7005,6 +7092,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_redirect_proto;
case BPF_FUNC_redirect_neigh:
return &bpf_redirect_neigh_proto;
+ case BPF_FUNC_redirect_peer:
+ return &bpf_redirect_peer_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc:
@@ -7218,6 +7307,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
return &sk_skb_change_head_proto;
+ case BPF_FUNC_skb_adjust_room:
+ return &sk_skb_adjust_room_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4b5f7c8fecd1..654182ecf87b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -433,10 +433,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- if (ingress)
- return sk_psock_skb_ingress(psock, skb);
- else
+ if (!ingress) {
+ if (!sock_writeable(psock->sk))
+ return -EAGAIN;
return skb_send_sock_locked(psock->sk, skb, off, len);
+ }
+ return sk_psock_skb_ingress(psock, skb);
}
static void sk_psock_backlog(struct work_struct *work)
@@ -625,6 +627,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
rcu_assign_sk_user_data(sk, NULL);
if (psock->progs.skb_parser)
sk_psock_stop_strp(sk, psock);
+ else if (psock->progs.skb_verdict)
+ sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
@@ -682,19 +686,8 @@ EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
struct sk_buff *skb)
{
- int ret;
-
- skb->sk = psock->sk;
bpf_compute_data_end_sk_skb(skb);
- ret = bpf_prog_run_pin_on_cpu(prog, skb);
- /* strparser clones the skb before handing it to a upper layer,
- * meaning skb_orphan has been called. We NULL sk on the way out
- * to ensure we don't trigger a BUG_ON() in skb/sk operations
- * later and because we are not charging the memory of this skb
- * to any socket yet.
- */
- skb->sk = NULL;
- return ret;
+ return bpf_prog_run_pin_on_cpu(prog, skb);
}
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
@@ -709,38 +702,35 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
- bool ingress;
sk_other = tcp_skb_bpf_redirect_fetch(skb);
+ /* This error is a buggy BPF program, it returned a redirect
+ * return code, but then didn't set a redirect interface.
+ */
if (unlikely(!sk_other)) {
kfree_skb(skb);
return;
}
psock_other = sk_psock(sk_other);
+ /* This error indicates the socket is being torn down or had another
+ * error that caused the pipe to break. We can't send a packet on
+ * a socket that is in this state so we drop the skb.
+ */
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
kfree_skb(skb);
return;
}
- ingress = tcp_skb_bpf_ingress(skb);
- if ((!ingress && sock_writeable(sk_other)) ||
- (ingress &&
- atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf)) {
- if (!ingress)
- skb_set_owner_w(skb, sk_other);
- skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
- } else {
- kfree_skb(skb);
- }
+ skb_queue_tail(&psock_other->ingress_skb, skb);
+ schedule_work(&psock_other->work);
}
-static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
+ skb_set_owner_r(skb, sk);
sk_psock_skb_redirect(skb);
break;
case __SK_PASS:
@@ -758,11 +748,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
+ /* We skip full set_owner_r here because if we do a SK_PASS
+ * or SK_DROP we can skip skb memory accounting and use the
+ * TLS context.
+ */
+ skb->sk = psock->sk;
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ skb->sk = NULL;
}
- sk_psock_tls_verdict_apply(skb, ret);
+ sk_psock_tls_verdict_apply(skb, psock->sk, ret);
rcu_read_unlock();
return ret;
}
@@ -771,7 +767,9 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
static void sk_psock_verdict_apply(struct sk_psock *psock,
struct sk_buff *skb, int verdict)
{
+ struct tcp_skb_cb *tcp;
struct sock *sk_other;
+ int err = -EIO;
switch (verdict) {
case __SK_PASS:
@@ -780,16 +778,24 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
goto out_free;
}
- if (atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf) {
- struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
- tcp->bpf.flags |= BPF_F_INGRESS;
+ tcp = TCP_SKB_CB(skb);
+ tcp->bpf.flags |= BPF_F_INGRESS;
+
+ /* If the queue is empty then we can submit directly
+ * into the msg queue. If its not empty we have to
+ * queue work otherwise we may get OOO data. Otherwise,
+ * if sk_psock_skb_ingress errors will be handled by
+ * retrying later from workqueue.
+ */
+ if (skb_queue_empty(&psock->ingress_skb)) {
+ err = sk_psock_skb_ingress(psock, skb);
+ }
+ if (err < 0) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work);
- break;
}
- goto out_free;
+ break;
case __SK_REDIRECT:
sk_psock_skb_redirect(skb);
break;
@@ -814,9 +820,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
kfree_skb(skb);
goto out;
}
+ skb_set_owner_r(skb, sk);
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb_orphan(skb);
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
@@ -839,8 +845,11 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_parser);
- if (likely(prog))
+ if (likely(prog)) {
+ skb->sk = psock->sk;
ret = sk_psock_bpf_run(psock, prog, skb);
+ skb->sk = NULL;
+ }
rcu_read_unlock();
return ret;
}
@@ -864,6 +873,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
rcu_read_unlock();
}
+static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
+ unsigned int offset, size_t orig_len)
+{
+ struct sock *sk = (struct sock *)desc->arg.data;
+ struct sk_psock *psock;
+ struct bpf_prog *prog;
+ int ret = __SK_DROP;
+ int len = skb->len;
+
+ /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ desc->error = -ENOMEM;
+ return 0;
+ }
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ len = 0;
+ kfree_skb(skb);
+ goto out;
+ }
+ skb_set_owner_r(skb, sk);
+ prog = READ_ONCE(psock->progs.skb_verdict);
+ if (likely(prog)) {
+ tcp_skb_bpf_redirect_clear(skb);
+ ret = sk_psock_bpf_run(psock, prog, skb);
+ ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ }
+ sk_psock_verdict_apply(psock, skb, ret);
+out:
+ rcu_read_unlock();
+ return len;
+}
+
+static void sk_psock_verdict_data_ready(struct sock *sk)
+{
+ struct socket *sock = sk->sk_socket;
+ read_descriptor_t desc;
+
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ return;
+
+ desc.arg.data = sk;
+ desc.error = 0;
+ desc.count = 1;
+
+ sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+}
+
static void sk_psock_write_space(struct sock *sk)
{
struct sk_psock *psock;
@@ -893,6 +953,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
return strp_init(&psock->parser.strp, sk, &cb);
}
+void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
+{
+ struct sk_psock_parser *parser = &psock->parser;
+
+ if (parser->enabled)
+ return;
+
+ parser->saved_data_ready = sk->sk_data_ready;
+ sk->sk_data_ready = sk_psock_verdict_data_ready;
+ sk->sk_write_space = sk_psock_write_space;
+ parser->enabled = true;
+}
+
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
@@ -918,3 +991,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
strp_stop(&parser->strp);
parser->enabled = false;
}
+
+void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
+{
+ struct sk_psock_parser *parser = &psock->parser;
+
+ if (!parser->enabled)
+ return;
+
+ sk->sk_data_ready = parser->saved_data_ready;
+ parser->saved_data_ready = NULL;
+ parser->enabled = false;
+}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e83a80e8f13b..df09c39a4dd2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -148,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
static void sock_map_del_link(struct sock *sk,
struct sk_psock *psock, void *link_raw)
{
+ bool strp_stop = false, verdict_stop = false;
struct sk_psock_link *link, *tmp;
- bool strp_stop = false;
spin_lock_bh(&psock->link_lock);
list_for_each_entry_safe(link, tmp, &psock->link, list) {
@@ -159,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
map);
if (psock->parser.enabled && stab->progs.skb_parser)
strp_stop = true;
+ if (psock->parser.enabled && stab->progs.skb_verdict)
+ verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
}
}
spin_unlock_bh(&psock->link_lock);
- if (strp_stop) {
+ if (strp_stop || verdict_stop) {
write_lock_bh(&sk->sk_callback_lock);
- sk_psock_stop_strp(sk, psock);
+ if (strp_stop)
+ sk_psock_stop_strp(sk, psock);
+ else
+ sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
}
}
@@ -230,16 +235,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
struct sk_psock *psock;
- bool skb_progs;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
skb_parser = READ_ONCE(progs->skb_parser);
- skb_progs = skb_parser && skb_verdict;
- if (skb_progs) {
+ if (skb_verdict) {
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
if (IS_ERR(skb_verdict))
return PTR_ERR(skb_verdict);
+ }
+ if (skb_parser) {
skb_parser = bpf_prog_inc_not_zero(skb_parser);
if (IS_ERR(skb_parser)) {
bpf_prog_put(skb_verdict);
@@ -264,7 +269,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
- (skb_progs && READ_ONCE(psock->progs.skb_parser))) {
+ (skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
+ (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
sk_psock_put(sk, psock);
ret = -EBUSY;
goto out_progs;
@@ -285,28 +291,31 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
- if (skb_progs && !psock->parser.enabled) {
+ if (skb_parser && skb_verdict && !psock->parser.enabled) {
ret = sk_psock_init_strp(sk, psock);
- if (ret) {
- write_unlock_bh(&sk->sk_callback_lock);
- goto out_drop;
- }
+ if (ret)
+ goto out_unlock_drop;
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
psock_set_prog(&psock->progs.skb_parser, skb_parser);
sk_psock_start_strp(sk, psock);
+ } else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
+ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ sk_psock_start_verdict(sk,psock);
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
+out_unlock_drop:
+ write_unlock_bh(&sk->sk_callback_lock);
out_drop:
sk_psock_put(sk, psock);
out_progs:
if (msg_parser)
bpf_prog_put(msg_parser);
out:
- if (skb_progs) {
+ if (skb_verdict)
bpf_prog_put(skb_verdict);
+ if (skb_parser)
bpf_prog_put(skb_parser);
- }
return ret;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 56c306e3cd2f..495dda2449fe 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -548,7 +548,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
- bpf_skops_init_child(sk, newsk);
tcp_bpf_clone(sk, newsk);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index e63fadd000db..64c9e55d4d4e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -3,9 +3,6 @@
#include <net/xsk_buff_pool.h>
#include <net/xdp_sock.h>
#include <net/xdp_sock_drv.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/swiotlb.h>
#include "xsk_queue.h"
#include "xdp_umem.h"
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index dc1dd5ef70d1..cdb9cf3cd136 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -15,6 +15,10 @@
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
+ /* Hinder the adjacent cache prefetcher to prefetch the consumer
+ * pointer if the producer pointer is touched and vice versa.
+ */
+ u32 pad ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
u32 flags;
};
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 0c5df593bc56..49da2b8ace8b 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
struct bpf_insn *insn = insn_buf;