diff options
author | David S. Miller <davem@davemloft.net> | 2019-09-02 21:07:46 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-09-02 21:07:46 +0200 |
commit | 67538eb5c00f08d7fe27f1bb703098b17302bdc0 (patch) | |
tree | 8c79108cdd29d552009e4d09acc23d9e1be51e02 | |
parent | net: dsa: Fix off-by-one number of calls to devlink_port_unregister (diff) | |
parent | mvpp2: percpu buffers (diff) | |
download | linux-67538eb5c00f08d7fe27f1bb703098b17302bdc0.tar.xz linux-67538eb5c00f08d7fe27f1bb703098b17302bdc0.zip |
Merge branch 'mvpp2-per-cpu-buffers'
Matteo Croce says:
====================
mvpp2: per-cpu buffers
This patchset workarounds an PP2 HW limitation which prevents to use
per-cpu rx buffers.
The first patch is just a refactor to prepare for the second one.
The second one allocates percpu buffers if the following conditions are met:
- CPU number is less or equal 4
- no port is using jumbo frames
If the following conditions are not met at load time, of jumbo frame is enabled
later on, the shared allocation is reverted.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 274 |
2 files changed, 237 insertions, 41 deletions
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index ee3bab508ee8..543a310ec102 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -683,6 +683,7 @@ enum mvpp2_prs_l3_cast { #define MVPP2_BM_SHORT_BUF_NUM 2048 #define MVPP2_BM_POOL_SIZE_MAX (16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4) #define MVPP2_BM_POOL_PTR_ALIGN 128 +#define MVPP2_BM_MAX_POOLS 8 /* BM cookie (32 bits) definition */ #define MVPP2_BM_COOKIE_POOL_OFFS 8 @@ -787,6 +788,9 @@ struct mvpp2 { /* Aggregated TXQs */ struct mvpp2_tx_queue *aggr_txqs; + /* Are we using page_pool with per-cpu pools? */ + int percpu_pools; + /* BM pools */ struct mvpp2_bm_pool *bm_pools; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 12e799e99803..111b3b8239e1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -292,6 +292,26 @@ static void mvpp2_txq_inc_put(struct mvpp2_port *port, txq_pcpu->txq_put_index = 0; } +/* Get number of maximum RXQ */ +static int mvpp2_get_nrxqs(struct mvpp2 *priv) +{ + unsigned int nrxqs; + + if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) + return 1; + + /* According to the PPv2.2 datasheet and our experiments on + * PPv2.1, RX queues have an allocation granularity of 4 (when + * more than a single one on PPv2.2). + * Round up to nearest multiple of 4. + */ + nrxqs = (num_possible_cpus() + 3) & ~0x3; + if (nrxqs > MVPP2_PORT_MAX_RXQ) + nrxqs = MVPP2_PORT_MAX_RXQ; + + return nrxqs; +} + /* Get number of physical egress port */ static inline int mvpp2_egress_port(struct mvpp2_port *port) { @@ -323,8 +343,7 @@ static void mvpp2_frag_free(const struct mvpp2_bm_pool *pool, void *data) /* Buffer Manager configuration routines */ /* Create pool */ -static int mvpp2_bm_pool_create(struct platform_device *pdev, - struct mvpp2 *priv, +static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool, int size) { u32 val; @@ -343,7 +362,7 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev, else bm_pool->size_bytes = 2 * sizeof(u64) * size; - bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes, + bm_pool->virt_addr = dma_alloc_coherent(dev, bm_pool->size_bytes, &bm_pool->dma_addr, GFP_KERNEL); if (!bm_pool->virt_addr) @@ -351,9 +370,9 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev, if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr, MVPP2_BM_POOL_PTR_ALIGN)) { - dma_free_coherent(&pdev->dev, bm_pool->size_bytes, + dma_free_coherent(dev, bm_pool->size_bytes, bm_pool->virt_addr, bm_pool->dma_addr); - dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n", + dev_err(dev, "BM pool %d is not %d bytes aligned\n", bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN); return -ENOMEM; } @@ -468,15 +487,14 @@ static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_p } /* Cleanup pool */ -static int mvpp2_bm_pool_destroy(struct platform_device *pdev, - struct mvpp2 *priv, +static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool) { int buf_num; u32 val; buf_num = mvpp2_check_hw_buf_num(priv, bm_pool); - mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num); + mvpp2_bm_bufs_free(dev, priv, bm_pool, buf_num); /* Check buffer counters after free */ buf_num = mvpp2_check_hw_buf_num(priv, bm_pool); @@ -490,24 +508,26 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev, val |= MVPP2_BM_STOP_MASK; mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val); - dma_free_coherent(&pdev->dev, bm_pool->size_bytes, + dma_free_coherent(dev, bm_pool->size_bytes, bm_pool->virt_addr, bm_pool->dma_addr); return 0; } -static int mvpp2_bm_pools_init(struct platform_device *pdev, - struct mvpp2 *priv) +static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv) { - int i, err, size; + int i, err, size, poolnum = MVPP2_BM_POOLS_NUM; struct mvpp2_bm_pool *bm_pool; + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + /* Create all pools with maximum size */ size = MVPP2_BM_POOL_SIZE_MAX; - for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) { + for (i = 0; i < poolnum; i++) { bm_pool = &priv->bm_pools[i]; bm_pool->id = i; - err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size); + err = mvpp2_bm_pool_create(dev, priv, bm_pool, size); if (err) goto err_unroll_pools; mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0); @@ -515,17 +535,23 @@ static int mvpp2_bm_pools_init(struct platform_device *pdev, return 0; err_unroll_pools: - dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size); + dev_err(dev, "failed to create BM pool %d, size %d\n", i, size); for (i = i - 1; i >= 0; i--) - mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]); + mvpp2_bm_pool_destroy(dev, priv, &priv->bm_pools[i]); return err; } -static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv) +static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { - int i, err; + int i, err, poolnum = MVPP2_BM_POOLS_NUM; - for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) { + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + dev_info(dev, "using %d %s buffers\n", poolnum, + priv->percpu_pools ? "per-cpu" : "shared"); + + for (i = 0; i < poolnum; i++) { /* Mask BM all interrupts */ mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0); /* Clear BM cause register */ @@ -533,12 +559,12 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv) } /* Allocate and initialize BM pools */ - priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM, + priv->bm_pools = devm_kcalloc(dev, poolnum, sizeof(*priv->bm_pools), GFP_KERNEL); if (!priv->bm_pools) return -ENOMEM; - err = mvpp2_bm_pools_init(pdev, priv); + err = mvpp2_bm_pools_init(dev, priv); if (err < 0) return err; return 0; @@ -679,6 +705,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port, phys_addr_t phys_addr; void *buf; + if (port->priv->percpu_pools && + bm_pool->pkt_size > MVPP2_BM_LONG_PKT_SIZE) { + netdev_err(port->dev, + "attempted to use jumbo frames with per-cpu pools"); + return 0; + } + buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size); total_size = MVPP2_RX_TOTAL_SIZE(buf_size); @@ -722,7 +755,64 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size) struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool]; int num; - if (pool >= MVPP2_BM_POOLS_NUM) { + if ((port->priv->percpu_pools && pool > mvpp2_get_nrxqs(port->priv) * 2) || + (!port->priv->percpu_pools && pool >= MVPP2_BM_POOLS_NUM)) { + netdev_err(port->dev, "Invalid pool %d\n", pool); + return NULL; + } + + /* Allocate buffers in case BM pool is used as long pool, but packet + * size doesn't match MTU or BM pool hasn't being used yet + */ + if (new_pool->pkt_size == 0) { + int pkts_num; + + /* Set default buffer number or free all the buffers in case + * the pool is not empty + */ + pkts_num = new_pool->buf_num; + if (pkts_num == 0) { + if (port->priv->percpu_pools) { + if (pool < port->nrxqs) + pkts_num = mvpp2_pools[MVPP2_BM_SHORT].buf_num; + else + pkts_num = mvpp2_pools[MVPP2_BM_LONG].buf_num; + } else { + pkts_num = mvpp2_pools[pool].buf_num; + } + } else { + mvpp2_bm_bufs_free(port->dev->dev.parent, + port->priv, new_pool, pkts_num); + } + + new_pool->pkt_size = pkt_size; + new_pool->frag_size = + SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) + + MVPP2_SKB_SHINFO_SIZE; + + /* Allocate buffers for this pool */ + num = mvpp2_bm_bufs_add(port, new_pool, pkts_num); + if (num != pkts_num) { + WARN(1, "pool %d: %d of %d allocated\n", + new_pool->id, num, pkts_num); + return NULL; + } + } + + mvpp2_bm_pool_bufsize_set(port->priv, new_pool, + MVPP2_RX_BUF_SIZE(new_pool->pkt_size)); + + return new_pool; +} + +static struct mvpp2_bm_pool * +mvpp2_bm_pool_use_percpu(struct mvpp2_port *port, int type, + unsigned int pool, int pkt_size) +{ + struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool]; + int num; + + if (pool > port->nrxqs * 2) { netdev_err(port->dev, "Invalid pool %d\n", pool); return NULL; } @@ -738,7 +828,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size) */ pkts_num = new_pool->buf_num; if (pkts_num == 0) - pkts_num = mvpp2_pools[pool].buf_num; + pkts_num = mvpp2_pools[type].buf_num; else mvpp2_bm_bufs_free(port->dev->dev.parent, port->priv, new_pool, pkts_num); @@ -763,11 +853,11 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size) return new_pool; } -/* Initialize pools for swf */ -static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) +/* Initialize pools for swf, shared buffers variant */ +static int mvpp2_swf_bm_pool_init_shared(struct mvpp2_port *port) { - int rxq; enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool; + int rxq; /* If port pkt_size is higher than 1518B: * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool @@ -811,6 +901,47 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) return 0; } +/* Initialize pools for swf, percpu buffers variant */ +static int mvpp2_swf_bm_pool_init_percpu(struct mvpp2_port *port) +{ + struct mvpp2_bm_pool *p; + int i; + + for (i = 0; i < port->nrxqs; i++) { + p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_SHORT, i, + mvpp2_pools[MVPP2_BM_SHORT].pkt_size); + if (!p) + return -ENOMEM; + + port->priv->bm_pools[i].port_map |= BIT(port->id); + mvpp2_rxq_short_pool_set(port, i, port->priv->bm_pools[i].id); + } + + for (i = 0; i < port->nrxqs; i++) { + p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_LONG, i + port->nrxqs, + mvpp2_pools[MVPP2_BM_LONG].pkt_size); + if (!p) + return -ENOMEM; + + port->priv->bm_pools[i + port->nrxqs].port_map |= BIT(port->id); + mvpp2_rxq_long_pool_set(port, i, + port->priv->bm_pools[i + port->nrxqs].id); + } + + port->pool_long = NULL; + port->pool_short = NULL; + + return 0; +} + +static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) +{ + if (port->priv->percpu_pools) + return mvpp2_swf_bm_pool_init_percpu(port); + else + return mvpp2_swf_bm_pool_init_shared(port); +} + static void mvpp2_set_hw_csum(struct mvpp2_port *port, enum mvpp2_bm_pool_log_num new_long_pool) { @@ -837,6 +968,9 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu) enum mvpp2_bm_pool_log_num new_long_pool; int pkt_size = MVPP2_RX_PKT_SIZE(mtu); + if (port->priv->percpu_pools) + goto out_set; + /* If port MTU is higher than 1518B: * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool @@ -866,6 +1000,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu) mvpp2_set_hw_csum(port, new_long_pool); } +out_set: dev->mtu = mtu; dev->wanted_features = dev->features; @@ -3699,10 +3834,48 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p) return err; } +/* Shut down all the ports, reconfigure the pools as percpu or shared, + * then bring up again all ports. + */ +static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) +{ + int numbufs = MVPP2_BM_POOLS_NUM, i; + struct mvpp2_port *port = NULL; + bool status[MVPP2_MAX_PORTS]; + + for (i = 0; i < priv->port_count; i++) { + port = priv->port_list[i]; + status[i] = netif_running(port->dev); + if (status[i]) + mvpp2_stop(port->dev); + } + + /* nrxqs is the same for all ports */ + if (priv->percpu_pools) + numbufs = port->nrxqs * 2; + + for (i = 0; i < numbufs; i++) + mvpp2_bm_pool_destroy(port->dev->dev.parent, priv, &priv->bm_pools[i]); + + devm_kfree(port->dev->dev.parent, priv->bm_pools); + priv->percpu_pools = percpu; + mvpp2_bm_init(port->dev->dev.parent, priv); + + for (i = 0; i < priv->port_count; i++) { + port = priv->port_list[i]; + mvpp2_swf_bm_pool_init(port); + if (status[i]) + mvpp2_open(port->dev); + } + + return 0; +} + static int mvpp2_change_mtu(struct net_device *dev, int mtu) { struct mvpp2_port *port = netdev_priv(dev); bool running = netif_running(dev); + struct mvpp2 *priv = port->priv; int err; if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) { @@ -3711,6 +3884,31 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8); } + if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) { + if (priv->percpu_pools) { + netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu); + mvpp2_bm_switch_buffers(priv, false); + } + } else { + bool jumbo = false; + int i; + + for (i = 0; i < priv->port_count; i++) + if (priv->port_list[i] != port && + MVPP2_RX_PKT_SIZE(priv->port_list[i]->dev->mtu) > + MVPP2_BM_LONG_PKT_SIZE) { + jumbo = true; + break; + } + + /* No port is using jumbo frames */ + if (!jumbo) { + dev_info(port->dev->dev.parent, + "all ports have a low MTU, switching to per-cpu buffers"); + mvpp2_bm_switch_buffers(priv, true); + } + } + if (running) mvpp2_stop_dev(port); @@ -5017,18 +5215,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, } ntxqs = MVPP2_MAX_TXQ; - if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) { - nrxqs = 1; - } else { - /* According to the PPv2.2 datasheet and our experiments on - * PPv2.1, RX queues have an allocation granularity of 4 (when - * more than a single one on PPv2.2). - * Round up to nearest multiple of 4. - */ - nrxqs = (num_possible_cpus() + 3) & ~0x3; - if (nrxqs > MVPP2_PORT_MAX_RXQ) - nrxqs = MVPP2_PORT_MAX_RXQ; - } + nrxqs = mvpp2_get_nrxqs(priv); dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs); if (!dev) @@ -5190,7 +5377,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->features |= NETIF_F_NTUPLE; } - mvpp2_set_hw_csum(port, port->pool_long->id); + if (!port->priv->percpu_pools) + mvpp2_set_hw_csum(port, port->pool_long->id); dev->vlan_features |= features; dev->gso_max_segs = MVPP2_MAX_TSO_SEGS; @@ -5482,7 +5670,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1); /* Buffer Manager initialization */ - err = mvpp2_bm_init(pdev, priv); + err = mvpp2_bm_init(&pdev->dev, priv); if (err < 0) return err; @@ -5568,6 +5756,10 @@ static int mvpp2_probe(struct platform_device *pdev) priv->sysctrl_base = NULL; } + if (priv->hw_version == MVPP22 && + mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS) + priv->percpu_pools = 1; + mvpp2_setup_bm_pool(); @@ -5749,7 +5941,7 @@ static int mvpp2_remove(struct platform_device *pdev) for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) { struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i]; - mvpp2_bm_pool_destroy(pdev, priv, bm_pool); + mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool); } for (i = 0; i < MVPP2_MAX_THREADS; i++) { |