From 74682128c6e456b529b25b95ba807484ce58a753 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sun, 5 Aug 2018 17:19:13 -0700 Subject: bpf: btf: Change tools/lib/bpf/btf to LGPL This patch changes the tools/lib/bpf/btf.[ch] to LGPL which is inline with libbpf also. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 2 +- tools/lib/bpf/btf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d270c560df3..c36a3a76986a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: LGPL-2.1 /* Copyright (c) 2018 Facebook */ #include diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index e2a09a155f84..caac3a404dc5 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: LGPL-2.1 */ /* Copyright (c) 2018 Facebook */ #ifndef __BPF_BTF_H -- cgit v1.2.3 From 5121700b346b6160ccc9411194e3f1f417c340d1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 8 Aug 2018 19:23:13 +0200 Subject: bpf, sockmap: fix bpf_tcp_sendmsg sock error handling While working on bpf_tcp_sendmsg() code, I noticed that when a sk->sk_err is set we error out with err = sk->sk_err. However this is problematic since sk->sk_err is a positive error value and therefore we will neither go into sk_stream_error() nor will we report an error back to user space. I had this case with EPIPE and user space was thinking sendmsg() succeeded since EPIPE is a positive value, thinking we submitted 32 bytes. Fix it by negating the sk->sk_err value. Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 98fb7938beea..f7360c4d7250 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1053,7 +1053,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int copy; if (sk->sk_err) { - err = sk->sk_err; + err = -sk->sk_err; goto out_err; } -- cgit v1.2.3 From 7c81c71730456845e6212dccbf00098faa66740f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 8 Aug 2018 19:23:14 +0200 Subject: bpf, sockmap: fix leak in bpf_tcp_sendmsg wait for mem path In bpf_tcp_sendmsg() the sk_alloc_sg() may fail. In the case of ENOMEM, it may also mean that we've partially filled the scatterlist entries with pages. Later jumping to sk_stream_wait_memory() we could further fail with an error for several reasons, however we miss to call free_start_sg() if the local sk_msg_buff was used. Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index f7360c4d7250..c4d75c52b4fc 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1048,7 +1048,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); while (msg_data_left(msg)) { - struct sk_msg_buff *m; + struct sk_msg_buff *m = NULL; bool enospc = false; int copy; @@ -1116,8 +1116,11 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: err = sk_stream_wait_memory(sk, &timeo); - if (err) + if (err) { + if (m && m != psock->cork) + free_start_sg(sk, m); goto out_err; + } } out_err: if (err < 0) -- cgit v1.2.3 From 3c6ed988fd8b66eec8e5ab2b624818865d2712ca Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 8 Aug 2018 19:23:15 +0200 Subject: bpf, sockmap: fix cork timeout for select due to epipe I ran into the same issue as a009f1f396d0 ("selftests/bpf: test_sockmap, timing improvements") where I had a broken pipe error on the socket due to remote end timing out on select and then shutting down it's sockets while the other side was still sending. We may need to do a bigger rework in general on the test_sockmap.c, but for now increase it to a more suitable timeout. Fixes: a18fda1a62c3 ("bpf: reduce runtime of test_sockmap tests") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_sockmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 9e78df207919..0c7d9e556b47 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -354,7 +354,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, while (s->bytes_recvd < total_bytes) { if (txmsg_cork) { timeout.tv_sec = 0; - timeout.tv_usec = 1000; + timeout.tv_usec = 300000; } else { timeout.tv_sec = 1; timeout.tv_usec = 0; -- cgit v1.2.3 From ad0ab027fc6da08cbd34070d816ff3b7986c64ae Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 8 Aug 2018 23:00:34 +0200 Subject: xdp: fix bug in cpumap teardown code path When removing a cpumap entry, a number of syncronization steps happen. Eventually the teardown code __cpu_map_entry_free is invoked from/via call_rcu. The teardown code __cpu_map_entry_free() flushes remaining xdp_frames, by invoking bq_flush_to_queue, which calls xdp_return_frame_rx_napi(). The issues is that the teardown code is not running in the RX NAPI code path. Thus, it is not allowed to invoke the NAPI variant of xdp_return_frame. This bug was found and triggered by using the --stress-mode option to the samples/bpf program xdp_redirect_cpu. It is hard to trigger, because the ptr_ring have to be full and cpumap bulk queue max contains 8 packets, and a remote CPU is racing to empty the ptr_ring queue. Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi") Tested-by: Jean-Tsung Hsiao Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/cpumap.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e0918d180f08..46f5f29605d4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -69,7 +69,7 @@ struct bpf_cpu_map { }; static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, - struct xdp_bulk_queue *bq); + struct xdp_bulk_queue *bq, bool in_napi_ctx); static u64 cpu_map_bitmap_size(const union bpf_attr *attr) { @@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); /* No concurrent bq_enqueue can run at this point */ - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, false); } free_percpu(rcpu->bulkq); /* Cannot kthread_stop() here, last put free rcpu resources */ @@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = { }; static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, - struct xdp_bulk_queue *bq) + struct xdp_bulk_queue *bq, bool in_napi_ctx) { unsigned int processed = 0, drops = 0; const int to_cpu = rcpu->cpu; @@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - xdp_return_frame_rx_napi(xdpf); + if (likely(in_napi_ctx)) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); } processed++; } @@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, true); /* Notice, xdp_buff/page MUST be queued here, long enough for * driver to code invoking us to finished, due to driver @@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map) /* Flush all frames in bulkq to real queue */ bq = this_cpu_ptr(rcpu->bulkq); - bq_flush_to_queue(rcpu, bq); + bq_flush_to_queue(rcpu, bq, true); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(rcpu->kthread); -- cgit v1.2.3 From 37d7ff25957e05860e068a986209fee128db574a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 8 Aug 2018 23:00:39 +0200 Subject: samples/bpf: xdp_redirect_cpu adjustment to reproduce teardown race easier The teardown race in cpumap is really hard to reproduce. These changes makes it easier to reproduce, for QA. The --stress-mode now have a case of a very small queue size of 8, that helps to trigger teardown flush to encounter a full queue, which results in calling xdp_return_frame API, in a non-NAPI protect context. Also increase MAX_CPUS, as my QA department have larger machines than me. Tested-by: Jean-Tsung Hsiao Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_redirect_cpu_kern.c | 2 +- samples/bpf/xdp_redirect_cpu_user.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index 303e9e7161f3..4938dcbaecbf 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -14,7 +14,7 @@ #include #include "bpf_helpers.h" -#define MAX_CPUS 12 /* WARNING - sync with _user.c */ +#define MAX_CPUS 64 /* WARNING - sync with _user.c */ /* Special map type that can XDP_REDIRECT frames to another CPU */ struct bpf_map_def SEC("maps") cpu_map = { diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f6efaefd485b..4b4d78fffe30 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -19,7 +19,7 @@ static const char *__doc__ = #include #include -#define MAX_CPUS 12 /* WARNING - sync with _kern.c */ +#define MAX_CPUS 64 /* WARNING - sync with _kern.c */ /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 5 @@ -527,7 +527,7 @@ static void stress_cpumap(void) * procedure. */ create_cpu_entry(1, 1024, 0, false); - create_cpu_entry(1, 128, 0, false); + create_cpu_entry(1, 8, 0, false); create_cpu_entry(1, 16000, 0, false); } -- cgit v1.2.3 From 1bf9116d0866a649104a5dfa008c302ad54d1e02 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 8 Aug 2018 23:00:45 +0200 Subject: xdp: fix bug in devmap teardown code path Like cpumap teardown, the devmap teardown code also flush remaining xdp_frames, via bq_xmit_all() in case map entry is removed. The code can call xdp_return_frame_rx_napi, from the the wrong context, in-case ndo_xdp_xmit() fails. Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi") Fixes: 735fc4054b3a ("xdp: change ndo_xdp_xmit API to support bulking") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d361fc1e3bf3..750d45edae79 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) } static int bq_xmit_all(struct bpf_dtab_netdev *obj, - struct xdp_bulk_queue *bq, u32 flags) + struct xdp_bulk_queue *bq, u32 flags, + bool in_napi_ctx) { struct net_device *dev = obj->dev; int sent = 0, drops = 0, err = 0; @@ -254,7 +255,10 @@ error: struct xdp_frame *xdpf = bq->q[i]; /* RX path under NAPI protection, can return frames faster */ - xdp_return_frame_rx_napi(xdpf); + if (likely(in_napi_ctx)) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); drops++; } goto out; @@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map) __clear_bit(bit, bitmap); bq = this_cpu_ptr(dev->bulkq); - bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); } } @@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) - bq_xmit_all(obj, bq, 0); + bq_xmit_all(obj, bq, 0, true); /* Ingress dev_rx will be the same for all xdp_frame's in * bulk_queue, because bq stored per-CPU and must be flushed @@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) __clear_bit(dev->bit, bitmap); bq = per_cpu_ptr(dev->bulkq, cpu); - bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); } } } -- cgit v1.2.3 From be35b982e83a3f2c2cfbb27320066fb8a27168c2 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Fri, 10 Aug 2018 15:47:08 +0300 Subject: net: ethernet: ti: cpsw: clear all entries when delete vid In cases if some of the entries were not found in forwarding table while killing vlan, the rest not needed entries still left in the table. No need to stop, as entry was deleted anyway. So fix this by returning error only after all was cleaned. To implement this, return -ENOENT in cpsw_ale_del_mcast() as it's supposed to be. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 14 ++++---------- drivers/net/ethernet/ti/cpsw_ale.c | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 358edab9e72e..9edac671f276 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2125,16 +2125,10 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0); - if (ret != 0) - return ret; - - ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, - HOST_PORT_NUM, ALE_VLAN, vid); - if (ret != 0) - return ret; - - ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast, - 0, ALE_VLAN, vid); + ret |= cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, + HOST_PORT_NUM, ALE_VLAN, vid); + ret |= cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast, + 0, ALE_VLAN, vid); pm_runtime_put(cpsw->dev); return ret; } diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 93dc05c194d3..5766225a4ce1 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -394,7 +394,7 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask, idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0); if (idx < 0) - return -EINVAL; + return -ENOENT; cpsw_ale_read(ale, idx, ale_entry); -- cgit v1.2.3 From 803c4f64d79d2fced37a0650b94e6c84296dffd6 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Fri, 10 Aug 2018 15:47:09 +0300 Subject: net: ethernet: ti: cpsw: fix runtime_pm while add/kill vlan It's exclusive with normal behaviour but if try to set vlan to one of the reserved values is made, the cpsw runtime pm is broken. Fixes: a6c5d14f5136 ("drivers: net: cpsw: ndev: fix accessing to suspended device") Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 9edac671f276..3e34cb8ac1d3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2086,14 +2086,16 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, int i; for (i = 0; i < cpsw->data.slaves; i++) { - if (vid == cpsw->slaves[i].port_vlan) - return -EINVAL; + if (vid == cpsw->slaves[i].port_vlan) { + ret = -EINVAL; + goto err; + } } } dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); ret = cpsw_add_vlan_ale_entry(priv, vid); - +err: pm_runtime_put(cpsw->dev); return ret; } @@ -2119,7 +2121,7 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, for (i = 0; i < cpsw->data.slaves; i++) { if (vid == cpsw->slaves[i].port_vlan) - return -EINVAL; + goto err; } } @@ -2129,6 +2131,7 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, HOST_PORT_NUM, ALE_VLAN, vid); ret |= cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast, 0, ALE_VLAN, vid); +err: pm_runtime_put(cpsw->dev); return ret; } -- cgit v1.2.3 From d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Aug 2018 16:42:16 +0200 Subject: xen/netfront: don't cache skb_shinfo() skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache its return value. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2d8812dd1534..9dd2ca62d84a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -894,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *skb, struct sk_buff_head *list) { - struct skb_shared_info *shinfo = skb_shinfo(skb); RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *nskb; @@ -903,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, RING_GET_RESPONSE(&queue->rx, ++cons); skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; - if (shinfo->nr_frags == MAX_SKB_FRAGS) { + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; BUG_ON(pull_to <= skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } - BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); + BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); - skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + skb_frag_page(nfrag), rx->offset, rx->status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; -- cgit v1.2.3