diff options
author | David S. Miller <davem@davemloft.net> | 2019-07-08 23:58:04 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-07-08 23:58:04 +0200 |
commit | aa6be2b95d4ecfd0e3a433fc3270da98fffab115 (patch) | |
tree | a472e36e039968b80e3ebc8301bc92cda9568ad6 | |
parent | macb: fix build warning for !CONFIG_OF (diff) | |
parent | net: ethernet: ti: cpsw: add XDP support (diff) | |
download | linux-aa6be2b95d4ecfd0e3a433fc3270da98fffab115.tar.xz linux-aa6be2b95d4ecfd0e3a433fc3270da98fffab115.zip |
Merge branch 'cpsw-Add-XDP-support'
Ivan Khoronzhuk says:
====================
net: ethernet: ti: cpsw: Add XDP support
This patchset adds XDP support for TI cpsw driver and base it on
page_pool allocator. It was verified on af_xdp socket drop,
af_xdp l2f, ebpf XDP_DROP, XDP_REDIRECT, XDP_PASS, XDP_TX.
It was verified with following configs enabled:
CONFIG_JIT=y
CONFIG_BPFILTER=y
CONFIG_BPF_SYSCALL=y
CONFIG_XDP_SOCKETS=y
CONFIG_BPF_EVENTS=y
CONFIG_HAVE_EBPF_JIT=y
CONFIG_BPF_JIT=y
CONFIG_CGROUP_BPF=y
Link on previous v7:
https://lkml.org/lkml/2019/7/4/715
Also regular tests with iperf2 were done in order to verify impact on
regular netstack performance, compared with base commit:
https://pastebin.com/JSMT0iZ4
v8..v9:
- fix warnings on arm64 caused by typos in type casting
v7..v8:
- corrected dma calculation based on headroom instead of hard start
- minor comment changes
v6..v7:
- rolled back to v4 solution but with small modification
- picked up patch:
https://www.spinics.net/lists/netdev/msg583145.html
- added changes related to netsec fix and cpsw
v5..v6:
- do changes that is rx_dev while redirect/flush cycle is kept the same
- dropped net: ethernet: ti: davinci_cpdma: return handler status
- other changes desc in patches
v4..v5:
- added two plreliminary patches:
net: ethernet: ti: davinci_cpdma: allow desc split while down
net: ethernet: ti: cpsw_ethtool: allow res split while down
- added xdp alocator refcnt on xdp level, avoiding page pool refcnt
- moved flush status as separate argument for cpdma_chan_process
- reworked cpsw code according to last changes to allocator
- added missed statistic counter
v3..v4:
- added page pool user counter
- use same pool for ndevs in dual mac
- restructured page pool create/destroy according to the last changes in API
v2..v3:
- each rxq and ndev has its own page pool
v1..v2:
- combined xdp_xmit functions
- used page allocation w/o refcnt juggle
- unmapped page for skb netstack
- moved rxq/page pool allocation to open/close pair
- added several preliminary patches:
net: page_pool: add helper function to retrieve dma addresses
net: page_pool: add helper function to unmap dma addresses
net: ethernet: ti: cpsw: use cpsw as drv data
net: ethernet: ti: cpsw_ethtool: simplify slave loops
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/socionext/netsec.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/cpsw.c | 502 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/cpsw_ethtool.c | 57 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/cpsw_priv.h | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/davinci_cpdma.c | 106 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/davinci_cpdma.h | 7 | ||||
-rw-r--r-- | include/net/page_pool.h | 25 | ||||
-rw-r--r-- | net/core/page_pool.c | 8 | ||||
-rw-r--r-- | net/core/xdp.c | 3 |
11 files changed, 640 insertions, 88 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 83194d56434d..10efd69de7ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -577,8 +577,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); - if (err) - page_pool_free(rq->page_pool); } if (err) goto err_free; @@ -646,6 +644,7 @@ err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -680,6 +679,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) } xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); } diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 460777449cd9..d7307ab90d74 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1212,15 +1212,11 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) } } - /* Rx is currently using page_pool - * since the pool is created during netsec_setup_rx_dring(), we need to - * free the pool manually if the registration failed - */ + /* Rx is currently using page_pool */ if (id == NETSEC_RING_RX) { if (xdp_rxq_info_is_reg(&dring->xdp_rxq)) xdp_rxq_info_unreg(&dring->xdp_rxq); - else - page_pool_free(dring->page_pool); + page_pool_destroy(dring->page_pool); } memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM); diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index a800d3417411..834afca3a019 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -50,6 +50,7 @@ config TI_CPSW depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST select TI_DAVINCI_MDIO select MFD_SYSCON + select PAGE_POOL select REGMAP ---help--- This driver supports TI's CPSW Ethernet Switch. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 32b7b3b74a6b..f16aefd8870b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -31,6 +31,10 @@ #include <linux/if_vlan.h> #include <linux/kmemleak.h> #include <linux/sys_soc.h> +#include <net/page_pool.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/filter.h> #include <linux/pinctrl/consumer.h> #include <net/pkt_cls.h> @@ -60,6 +64,10 @@ static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT; module_param(descs_pool_size, int, 0444); MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool"); +/* The buf includes headroom compatible with both skb and xdpf */ +#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN) +#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long)) + #define for_each_slave(priv, func, arg...) \ do { \ struct cpsw_slave *slave; \ @@ -74,6 +82,11 @@ MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool"); (func)(slave++, ##arg); \ } while (0) +#define CPSW_XMETA_OFFSET ALIGN(sizeof(struct xdp_frame), sizeof(long)) + +#define CPSW_XDP_CONSUMED 1 +#define CPSW_XDP_PASS 0 + static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid); @@ -337,24 +350,58 @@ void cpsw_intr_disable(struct cpsw_common *cpsw) return; } +static int cpsw_is_xdpf_handle(void *handle) +{ + return (unsigned long)handle & BIT(0); +} + +static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf) +{ + return (void *)((unsigned long)xdpf | BIT(0)); +} + +static struct xdp_frame *cpsw_handle_to_xdpf(void *handle) +{ + return (struct xdp_frame *)((unsigned long)handle & ~BIT(0)); +} + +struct __aligned(sizeof(long)) cpsw_meta_xdp { + struct net_device *ndev; + int ch; +}; + void cpsw_tx_handler(void *token, int len, int status) { + struct cpsw_meta_xdp *xmeta; + struct xdp_frame *xdpf; + struct net_device *ndev; struct netdev_queue *txq; - struct sk_buff *skb = token; - struct net_device *ndev = skb->dev; - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct sk_buff *skb; + int ch; + + if (cpsw_is_xdpf_handle(token)) { + xdpf = cpsw_handle_to_xdpf(token); + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET; + ndev = xmeta->ndev; + ch = xmeta->ch; + xdp_return_frame(xdpf); + } else { + skb = token; + ndev = skb->dev; + ch = skb_get_queue_mapping(skb); + cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb); + dev_kfree_skb_any(skb); + } /* Check whether the queue is stopped due to stalled tx dma, if the * queue is stopped then start the queue as we have free desc for tx */ - txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + txq = netdev_get_tx_queue(ndev, ch); if (unlikely(netif_tx_queue_stopped(txq))) netif_tx_wake_queue(txq); - cpts_tx_timestamp(cpsw->cpts, skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; - dev_kfree_skb_any(skb); } static void cpsw_rx_vlan_encap(struct sk_buff *skb) @@ -400,24 +447,252 @@ static void cpsw_rx_vlan_encap(struct sk_buff *skb) } } +static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, + struct page *page) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_meta_xdp *xmeta; + struct cpdma_chan *txch; + dma_addr_t dma; + int ret, port; + + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET; + xmeta->ndev = priv->ndev; + xmeta->ch = 0; + txch = cpsw->txv[0].ch; + + port = priv->emac_port + cpsw->data.dual_emac; + if (page) { + dma = page_pool_get_dma_addr(page); + dma += xdpf->headroom + sizeof(struct xdp_frame); + ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf), + dma, xdpf->len, port); + } else { + if (sizeof(*xmeta) > xdpf->headroom) { + xdp_return_frame_rx_napi(xdpf); + return -EINVAL; + } + + ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf), + xdpf->data, xdpf->len, port); + } + + if (ret) { + priv->ndev->stats.tx_dropped++; + xdp_return_frame_rx_napi(xdpf); + } + + return ret; +} + +static int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, + struct page *page) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct net_device *ndev = priv->ndev; + int ret = CPSW_XDP_CONSUMED; + struct xdp_frame *xdpf; + struct bpf_prog *prog; + u32 act; + + rcu_read_lock(); + + prog = READ_ONCE(priv->xdp_prog); + if (!prog) { + ret = CPSW_XDP_PASS; + goto out; + } + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + ret = CPSW_XDP_PASS; + break; + case XDP_TX: + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + goto drop; + + cpsw_xdp_tx_frame(priv, xdpf, page); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(ndev, xdp, prog)) + goto drop; + + /* Have to flush here, per packet, instead of doing it in bulk + * at the end of the napi handler. The RX devices on this + * particular hardware is sharing a common queue, so the + * incoming device might change per packet. + */ + xdp_do_flush_map(); + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + /* fall through -- handle aborts by dropping packet */ + case XDP_DROP: + goto drop; + } +out: + rcu_read_unlock(); + return ret; +drop: + rcu_read_unlock(); + page_pool_recycle_direct(cpsw->page_pool[ch], page); + return ret; +} + +static unsigned int cpsw_rxbuf_total_len(unsigned int len) +{ + len += CPSW_HEADROOM; + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + return SKB_DATA_ALIGN(len); +} + +static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, + int size) +{ + struct page_pool_params pp_params; + struct page_pool *pool; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = size; + pp_params.nid = NUMA_NO_NODE; + pp_params.dma_dir = DMA_BIDIRECTIONAL; + pp_params.dev = cpsw->dev; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + dev_err(cpsw->dev, "cannot create rx page pool\n"); + + return pool; +} + +static int cpsw_ndev_create_xdp_rxq(struct cpsw_priv *priv, int ch) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct xdp_rxq_info *rxq; + struct page_pool *pool; + int ret; + + pool = cpsw->page_pool[ch]; + rxq = &priv->xdp_rxq[ch]; + + ret = xdp_rxq_info_reg(rxq, priv->ndev, ch); + if (ret) + return ret; + + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + xdp_rxq_info_unreg(rxq); + + return ret; +} + +static void cpsw_ndev_destroy_xdp_rxq(struct cpsw_priv *priv, int ch) +{ + struct xdp_rxq_info *rxq = &priv->xdp_rxq[ch]; + + if (!xdp_rxq_info_is_reg(rxq)) + return; + + xdp_rxq_info_unreg(rxq); +} + +static int cpsw_create_rx_pool(struct cpsw_common *cpsw, int ch) +{ + struct page_pool *pool; + int ret = 0, pool_size; + + pool_size = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch); + pool = cpsw_create_page_pool(cpsw, pool_size); + if (IS_ERR(pool)) + ret = PTR_ERR(pool); + else + cpsw->page_pool[ch] = pool; + + return ret; +} + +void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i, ch; + + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!ndev) + continue; + + cpsw_ndev_destroy_xdp_rxq(netdev_priv(ndev), ch); + } + + page_pool_destroy(cpsw->page_pool[ch]); + cpsw->page_pool[ch] = NULL; + } +} + +int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i, ch, ret; + + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + ret = cpsw_create_rx_pool(cpsw, ch); + if (ret) + goto err_cleanup; + + /* using same page pool is allowed as no running rx handlers + * simultaneously for both ndevs + */ + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!ndev) + continue; + + ret = cpsw_ndev_create_xdp_rxq(netdev_priv(ndev), ch); + if (ret) + goto err_cleanup; + } + } + + return 0; + +err_cleanup: + cpsw_destroy_xdp_rxqs(cpsw); + + return ret; +} + static void cpsw_rx_handler(void *token, int len, int status) { - struct cpdma_chan *ch; - struct sk_buff *skb = token; - struct sk_buff *new_skb; - struct net_device *ndev = skb->dev; - int ret = 0, port; - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct page *new_page, *page = token; + void *pa = page_address(page); + struct cpsw_meta_xdp *xmeta = pa + CPSW_XMETA_OFFSET; + struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev); + int pkt_size = cpsw->rx_packet_max; + int ret = 0, port, ch = xmeta->ch; + int headroom = CPSW_HEADROOM; + struct net_device *ndev = xmeta->ndev; struct cpsw_priv *priv; + struct page_pool *pool; + struct sk_buff *skb; + struct xdp_buff xdp; + dma_addr_t dma; - if (cpsw->data.dual_emac) { + if (cpsw->data.dual_emac && status >= 0) { port = CPDMA_RX_SOURCE_PORT(status); - if (port) { + if (port) ndev = cpsw->slaves[--port].ndev; - skb->dev = ndev; - } } + priv = netdev_priv(ndev); + pool = cpsw->page_pool[ch]; if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { /* In dual emac mode check for all interfaces */ if (cpsw->data.dual_emac && cpsw->usage_count && @@ -426,43 +701,87 @@ static void cpsw_rx_handler(void *token, int len, int status) * is already down and the other interface is up * and running, instead of freeing which results * in reducing of the number of rx descriptor in - * DMA engine, requeue skb back to cpdma. + * DMA engine, requeue page back to cpdma. */ - new_skb = skb; + new_page = page; goto requeue; } - /* the interface is going down, skbs are purged */ - dev_kfree_skb_any(skb); + /* the interface is going down, pages are purged */ + page_pool_recycle_direct(pool, page); return; } - new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max); - if (new_skb) { - skb_copy_queue_mapping(new_skb, skb); - skb_put(skb, len); - if (status & CPDMA_RX_VLAN_ENCAP) - cpsw_rx_vlan_encap(skb); - priv = netdev_priv(ndev); - if (priv->rx_ts_enabled) - cpts_rx_timestamp(cpsw->cpts, skb); - skb->protocol = eth_type_trans(skb, ndev); - netif_receive_skb(skb); - ndev->stats.rx_bytes += len; - ndev->stats.rx_packets++; - kmemleak_not_leak(new_skb); - } else { + new_page = page_pool_dev_alloc_pages(pool); + if (unlikely(!new_page)) { + new_page = page; ndev->stats.rx_dropped++; - new_skb = skb; + goto requeue; + } + + if (priv->xdp_prog) { + if (status & CPDMA_RX_VLAN_ENCAP) { + xdp.data = pa + CPSW_HEADROOM + + CPSW_RX_VLAN_ENCAP_HDR_SIZE; + xdp.data_end = xdp.data + len - + CPSW_RX_VLAN_ENCAP_HDR_SIZE; + } else { + xdp.data = pa + CPSW_HEADROOM; + xdp.data_end = xdp.data + len; + } + + xdp_set_data_meta_invalid(&xdp); + + xdp.data_hard_start = pa; + xdp.rxq = &priv->xdp_rxq[ch]; + + ret = cpsw_run_xdp(priv, ch, &xdp, page); + if (ret != CPSW_XDP_PASS) + goto requeue; + + /* XDP prog might have changed packet data and boundaries */ + len = xdp.data_end - xdp.data; + headroom = xdp.data - xdp.data_hard_start; + + /* XDP prog can modify vlan tag, so can't use encap header */ + status &= ~CPDMA_RX_VLAN_ENCAP; } + /* pass skb to netstack if no XDP prog or returned XDP_PASS */ + skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size)); + if (!skb) { + ndev->stats.rx_dropped++; + page_pool_recycle_direct(pool, page); + goto requeue; + } + + skb_reserve(skb, headroom); + skb_put(skb, len); + skb->dev = ndev; + if (status & CPDMA_RX_VLAN_ENCAP) + cpsw_rx_vlan_encap(skb); + if (priv->rx_ts_enabled) + cpts_rx_timestamp(cpsw->cpts, skb); + skb->protocol = eth_type_trans(skb, ndev); + + /* unmap page as no netstack skb page recycling */ + page_pool_release_page(pool, page); + netif_receive_skb(skb); + + ndev->stats.rx_bytes += len; + ndev->stats.rx_packets++; + requeue: - ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch; - ret = cpdma_chan_submit(ch, new_skb, new_skb->data, - skb_tailroom(new_skb), 0); + xmeta = page_address(new_page) + CPSW_XMETA_OFFSET; + xmeta->ndev = ndev; + xmeta->ch = ch; + + dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM; + ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma, + pkt_size, 0); if (ret < 0) { WARN_ON(ret == -ENOMEM); - dev_kfree_skb_any(new_skb); + page_pool_recycle_direct(pool, new_page); } } @@ -1032,33 +1351,39 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) int cpsw_fill_rx_channels(struct cpsw_priv *priv) { struct cpsw_common *cpsw = priv->cpsw; - struct sk_buff *skb; + struct cpsw_meta_xdp *xmeta; + struct page_pool *pool; + struct page *page; int ch_buf_num; int ch, i, ret; + dma_addr_t dma; for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + pool = cpsw->page_pool[ch]; ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch); for (i = 0; i < ch_buf_num; i++) { - skb = __netdev_alloc_skb_ip_align(priv->ndev, - cpsw->rx_packet_max, - GFP_KERNEL); - if (!skb) { - cpsw_err(priv, ifup, "cannot allocate skb\n"); + page = page_pool_dev_alloc_pages(pool); + if (!page) { + cpsw_err(priv, ifup, "allocate rx page err\n"); return -ENOMEM; } - skb_set_queue_mapping(skb, ch); - ret = cpdma_chan_idle_submit(cpsw->rxv[ch].ch, skb, - skb->data, - skb_tailroom(skb), 0); + xmeta = page_address(page) + CPSW_XMETA_OFFSET; + xmeta->ndev = priv->ndev; + xmeta->ch = ch; + + dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM; + ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch, + page, dma, + cpsw->rx_packet_max, + 0); if (ret < 0) { cpsw_err(priv, ifup, - "cannot submit skb to channel %d rx, error %d\n", + "cannot submit page to channel %d rx, error %d\n", ch, ret); - kfree_skb(skb); + page_pool_recycle_direct(pool, page); return ret; } - kmemleak_not_leak(skb); } cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n", @@ -1394,6 +1719,13 @@ static int cpsw_ndo_open(struct net_device *ndev) enable_irq(cpsw->irqs_table[0]); } + /* create rxqs for both infs in dual mac as they use same pool + * and must be destroyed together when no users. + */ + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret < 0) + goto err_cleanup; + ret = cpsw_fill_rx_channels(priv); if (ret < 0) goto err_cleanup; @@ -1422,9 +1754,10 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: if (!cpsw->usage_count) { cpdma_ctlr_stop(cpsw->dma); - for_each_slave(priv, cpsw_slave_stop, cpsw); + cpsw_destroy_xdp_rxqs(cpsw); } + for_each_slave(priv, cpsw_slave_stop, cpsw); pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); return ret; @@ -1447,6 +1780,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) cpsw_intr_disable(cpsw); cpdma_ctlr_stop(cpsw->dma); cpsw_ale_stop(cpsw->ale); + cpsw_destroy_xdp_rxqs(cpsw); } for_each_slave(priv, cpsw_slave_stop, cpsw); @@ -2004,6 +2338,64 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, } } +static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog; + + if (!priv->xdpi.prog && !prog) + return 0; + + if (!xdp_attachment_flags_ok(&priv->xdpi, bpf)) + return -EBUSY; + + WRITE_ONCE(priv->xdp_prog, prog); + + xdp_attachment_setup(&priv->xdpi, bpf); + + return 0; +} + +static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return cpsw_xdp_prog_setup(priv, bpf); + + case XDP_QUERY_PROG: + return xdp_attachment_query(&priv->xdpi, bpf); + + default: + return -EINVAL; + } +} + +static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct xdp_frame *xdpf; + int i, drops = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + for (i = 0; i < n; i++) { + xdpf = frames[i]; + if (xdpf->len < CPSW_MIN_PACKET_SIZE) { + xdp_return_frame_rx_napi(xdpf); + drops++; + continue; + } + + if (cpsw_xdp_tx_frame(priv, xdpf, NULL)) + drops++; + } + + return n - drops; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void cpsw_ndo_poll_controller(struct net_device *ndev) { @@ -2032,6 +2424,8 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid, .ndo_setup_tc = cpsw_ndo_setup_tc, + .ndo_bpf = cpsw_ndo_bpf, + .ndo_xdp_xmit = cpsw_ndo_xdp_xmit, }; static void cpsw_get_drvinfo(struct net_device *ndev, diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index f60dc1dfc443..31248a6cc642 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -578,6 +578,18 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx, return 0; } +static void cpsw_fail(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i; + + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (ndev) + dev_close(ndev); + } +} + int cpsw_set_channels_common(struct net_device *ndev, struct ethtool_channels *chs, cpdma_handler_fn rx_handler) @@ -585,7 +597,7 @@ int cpsw_set_channels_common(struct net_device *ndev, struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; struct net_device *sl_ndev; - int i, ret; + int i, new_pools, ret; ret = cpsw_check_ch_settings(cpsw, chs); if (ret < 0) @@ -593,6 +605,8 @@ int cpsw_set_channels_common(struct net_device *ndev, cpsw_suspend_data_pass(ndev); + new_pools = (chs->rx_count != cpsw->rx_ch_num) && cpsw->usage_count; + ret = cpsw_update_channels_res(priv, chs->rx_count, 1, rx_handler); if (ret) goto err; @@ -620,15 +634,21 @@ int cpsw_set_channels_common(struct net_device *ndev, } } - if (cpsw->usage_count) - cpsw_split_res(cpsw); + cpsw_split_res(cpsw); + + if (new_pools) { + cpsw_destroy_xdp_rxqs(cpsw); + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret) + goto err; + } ret = cpsw_resume_data_pass(ndev); if (!ret) return 0; err: dev_err(priv->dev, "cannot update channels number, closing device\n"); - dev_close(ndev); + cpsw_fail(cpsw); return ret; } @@ -648,9 +668,8 @@ void cpsw_get_ringparam(struct net_device *ndev, int cpsw_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; - int ret; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + int descs_num, ret; /* ignore ering->tx_pending - only rx_pending adjustment is supported */ @@ -659,22 +678,34 @@ int cpsw_set_ringparam(struct net_device *ndev, ering->rx_pending > (cpsw->descs_pool_size - CPSW_MAX_QUEUES)) return -EINVAL; - if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma)) + descs_num = cpdma_get_num_rx_descs(cpsw->dma); + if (ering->rx_pending == descs_num) return 0; cpsw_suspend_data_pass(ndev); - cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); + ret = cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); + if (ret) { + if (cpsw_resume_data_pass(ndev)) + goto err; + + return ret; + } - if (cpsw->usage_count) - cpdma_chan_split_pool(cpsw->dma); + if (cpsw->usage_count) { + cpsw_destroy_xdp_rxqs(cpsw); + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret) + goto err; + } ret = cpsw_resume_data_pass(ndev); if (!ret) return 0; - +err: + cpdma_set_num_rx_descs(cpsw->dma, descs_num); dev_err(cpsw->dev, "cannot set ring params, closing device\n"); - dev_close(ndev); + cpsw_fail(cpsw); return ret; } diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 04795b97ee71..086bf38a4736 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -346,6 +346,7 @@ struct cpsw_common { int rx_ch_num, tx_ch_num; int speed; int usage_count; + struct page_pool *page_pool[CPSW_MAX_QUEUES]; }; struct cpsw_priv { @@ -360,6 +361,10 @@ struct cpsw_priv { int shp_cfg_speed; int tx_ts_enabled; int rx_ts_enabled; + struct bpf_prog *xdp_prog; + struct xdp_rxq_info xdp_rxq[CPSW_MAX_QUEUES]; + struct xdp_attachment_info xdpi; + u32 emac_port; struct cpsw_common *cpsw; }; @@ -391,6 +396,8 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv); void cpsw_intr_enable(struct cpsw_common *cpsw); void cpsw_intr_disable(struct cpsw_common *cpsw); void cpsw_tx_handler(void *token, int len, int status); +int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw); +void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw); /* ethtool */ u32 cpsw_get_msglevel(struct net_device *ndev); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 5cf1758d425b..0ca2a1a254de 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -139,6 +139,7 @@ struct submit_info { int directed; void *token; void *data; + int flags; int len; }; @@ -184,6 +185,8 @@ static struct cpdma_control_info controls[] = { (directed << CPDMA_TO_PORT_SHIFT)); \ } while (0) +#define CPDMA_DMA_EXT_MAP BIT(16) + static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr) { struct cpdma_desc_pool *pool = ctlr->pool; @@ -1015,6 +1018,7 @@ static int cpdma_chan_submit_si(struct submit_info *si) struct cpdma_chan *chan = si->chan; struct cpdma_ctlr *ctlr = chan->ctlr; int len = si->len; + int swlen = len; struct cpdma_desc __iomem *desc; dma_addr_t buffer; u32 mode; @@ -1036,16 +1040,22 @@ static int cpdma_chan_submit_si(struct submit_info *si) chan->stats.runt_transmit_buff++; } - buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir); - ret = dma_mapping_error(ctlr->dev, buffer); - if (ret) { - cpdma_desc_free(ctlr->pool, desc, 1); - return -EINVAL; - } - mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP; cpdma_desc_to_port(chan, mode, si->directed); + if (si->flags & CPDMA_DMA_EXT_MAP) { + buffer = (dma_addr_t)si->data; + dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir); + swlen |= CPDMA_DMA_EXT_MAP; + } else { + buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir); + ret = dma_mapping_error(ctlr->dev, buffer); + if (ret) { + cpdma_desc_free(ctlr->pool, desc, 1); + return -EINVAL; + } + } + /* Relaxed IO accessors can be used here as there is read barrier * at the end of write sequence. */ @@ -1055,7 +1065,7 @@ static int cpdma_chan_submit_si(struct submit_info *si) writel_relaxed(mode | len, &desc->hw_mode); writel_relaxed((uintptr_t)si->token, &desc->sw_token); writel_relaxed(buffer, &desc->sw_buffer); - writel_relaxed(len, &desc->sw_len); + writel_relaxed(swlen, &desc->sw_len); desc_read(desc, sw_len); __cpdma_chan_submit(chan, desc); @@ -1079,6 +1089,32 @@ int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, si.data = data; si.len = len; si.directed = directed; + si.flags = 0; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state == CPDMA_STATE_TEARDOWN) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); + spin_unlock_irqrestore(&chan->lock, flags); + return ret; +} + +int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = (void *)data; + si.len = len; + si.directed = directed; + si.flags = CPDMA_DMA_EXT_MAP; spin_lock_irqsave(&chan->lock, flags); if (chan->state == CPDMA_STATE_TEARDOWN) { @@ -1103,6 +1139,32 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, si.data = data; si.len = len; si.directed = directed; + si.flags = 0; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state != CPDMA_STATE_ACTIVE) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); + spin_unlock_irqrestore(&chan->lock, flags); + return ret; +} + +int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = (void *)data; + si.len = len; + si.directed = directed; + si.flags = CPDMA_DMA_EXT_MAP; spin_lock_irqsave(&chan->lock, flags); if (chan->state != CPDMA_STATE_ACTIVE) { @@ -1140,10 +1202,17 @@ static void __cpdma_chan_free(struct cpdma_chan *chan, uintptr_t token; token = desc_read(desc, sw_token); - buff_dma = desc_read(desc, sw_buffer); origlen = desc_read(desc, sw_len); - dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir); + buff_dma = desc_read(desc, sw_buffer); + if (origlen & CPDMA_DMA_EXT_MAP) { + origlen &= ~CPDMA_DMA_EXT_MAP; + dma_sync_single_for_cpu(ctlr->dev, buff_dma, origlen, + chan->dir); + } else { + dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir); + } + cpdma_desc_free(pool, desc, 1); (*chan->handler)((void *)token, outlen, status); } @@ -1354,8 +1423,23 @@ int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr) return ctlr->num_tx_desc; } -void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) +int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) { + unsigned long flags; + int temp, ret; + + spin_lock_irqsave(&ctlr->lock, flags); + + temp = ctlr->num_rx_desc; ctlr->num_rx_desc = num_rx_desc; ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc; + ret = cpdma_chan_split_pool(ctlr); + if (ret) { + ctlr->num_rx_desc = temp; + ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc; + } + + spin_unlock_irqrestore(&ctlr->lock, flags); + + return ret; } diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 9343c8c73c1b..d3cfe234d16a 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -77,8 +77,12 @@ int cpdma_chan_stop(struct cpdma_chan *chan); int cpdma_chan_get_stats(struct cpdma_chan *chan, struct cpdma_chan_stats *stats); +int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed); int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, int len, int directed); +int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed); int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, int len, int directed); int cpdma_chan_process(struct cpdma_chan *chan, int quota); @@ -112,8 +116,7 @@ enum cpdma_control { int cpdma_control_get(struct cpdma_ctlr *ctlr, int control); int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value); int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr); -void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc); +int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc); int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr); -int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr); #endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ee9c871d2043..2cbcdbdec254 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -101,6 +101,12 @@ struct page_pool { struct ptr_ring ring; atomic_t pages_state_release_cnt; + + /* A page_pool is strictly tied to a single RX-queue being + * protected by NAPI, due to above pp_alloc_cache. This + * refcnt serves purpose is to simplify drivers error handling. + */ + refcount_t user_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -134,6 +140,15 @@ static inline void page_pool_free(struct page_pool *pool) #endif } +/* Drivers use this instead of page_pool_free */ +static inline void page_pool_destroy(struct page_pool *pool) +{ + if (!pool) + return; + + page_pool_free(pool); +} + /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, struct page *page, bool allow_direct); @@ -201,4 +216,14 @@ static inline bool is_page_pool_compiled_in(void) #endif } +static inline void page_pool_get(struct page_pool *pool) +{ + refcount_inc(&pool->user_cnt); +} + +static inline bool page_pool_put(struct page_pool *pool) +{ + return refcount_dec_and_test(&pool->user_cnt); +} + #endif /* _NET_PAGE_POOL_H */ diff --git a/net/core/page_pool.c b/net/core/page_pool.c index b366f59885c1..3272dc7a8c81 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -49,6 +49,9 @@ static int page_pool_init(struct page_pool *pool, atomic_set(&pool->pages_state_release_cnt, 0); + /* Driver calling page_pool_create() also call page_pool_destroy() */ + refcount_set(&pool->user_cnt, 1); + if (pool->p.flags & PP_FLAG_DMA_MAP) get_device(pool->p.dev); @@ -70,6 +73,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) kfree(pool); return ERR_PTR(err); } + return pool; } EXPORT_SYMBOL(page_pool_create); @@ -356,6 +360,10 @@ static void __warn_in_flight(struct page_pool *pool) void __page_pool_free(struct page_pool *pool) { + /* Only last user actually free/release resources */ + if (!page_pool_put(pool)) + return; + WARN(pool->alloc.count, "API usage violation"); WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty"); diff --git a/net/core/xdp.c b/net/core/xdp.c index 829377cc83db..d7bf62ffbb5e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -370,6 +370,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, goto err; } + if (type == MEM_TYPE_PAGE_POOL) + page_pool_get(xdp_alloc->page_pool); + mutex_unlock(&mem_id_lock); trace_mem_connect(xdp_alloc, xdp_rxq); |