summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-09-02 21:07:46 +0200
committerDavid S. Miller <davem@davemloft.net>2019-09-02 21:07:46 +0200
commit67538eb5c00f08d7fe27f1bb703098b17302bdc0 (patch)
tree8c79108cdd29d552009e4d09acc23d9e1be51e02
parentnet: dsa: Fix off-by-one number of calls to devlink_port_unregister (diff)
parentmvpp2: percpu buffers (diff)
downloadlinux-67538eb5c00f08d7fe27f1bb703098b17302bdc0.tar.xz
linux-67538eb5c00f08d7fe27f1bb703098b17302bdc0.zip
Merge branch 'mvpp2-per-cpu-buffers'
Matteo Croce says: ==================== mvpp2: per-cpu buffers This patchset workarounds an PP2 HW limitation which prevents to use per-cpu rx buffers. The first patch is just a refactor to prepare for the second one. The second one allocates percpu buffers if the following conditions are met: - CPU number is less or equal 4 - no port is using jumbo frames If the following conditions are not met at load time, of jumbo frame is enabled later on, the shared allocation is reverted. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2.h4
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c274
2 files changed, 237 insertions, 41 deletions
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index ee3bab508ee8..543a310ec102 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -683,6 +683,7 @@ enum mvpp2_prs_l3_cast {
#define MVPP2_BM_SHORT_BUF_NUM 2048
#define MVPP2_BM_POOL_SIZE_MAX (16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
#define MVPP2_BM_POOL_PTR_ALIGN 128
+#define MVPP2_BM_MAX_POOLS 8
/* BM cookie (32 bits) definition */
#define MVPP2_BM_COOKIE_POOL_OFFS 8
@@ -787,6 +788,9 @@ struct mvpp2 {
/* Aggregated TXQs */
struct mvpp2_tx_queue *aggr_txqs;
+ /* Are we using page_pool with per-cpu pools? */
+ int percpu_pools;
+
/* BM pools */
struct mvpp2_bm_pool *bm_pools;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 12e799e99803..111b3b8239e1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -292,6 +292,26 @@ static void mvpp2_txq_inc_put(struct mvpp2_port *port,
txq_pcpu->txq_put_index = 0;
}
+/* Get number of maximum RXQ */
+static int mvpp2_get_nrxqs(struct mvpp2 *priv)
+{
+ unsigned int nrxqs;
+
+ if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE)
+ return 1;
+
+ /* According to the PPv2.2 datasheet and our experiments on
+ * PPv2.1, RX queues have an allocation granularity of 4 (when
+ * more than a single one on PPv2.2).
+ * Round up to nearest multiple of 4.
+ */
+ nrxqs = (num_possible_cpus() + 3) & ~0x3;
+ if (nrxqs > MVPP2_PORT_MAX_RXQ)
+ nrxqs = MVPP2_PORT_MAX_RXQ;
+
+ return nrxqs;
+}
+
/* Get number of physical egress port */
static inline int mvpp2_egress_port(struct mvpp2_port *port)
{
@@ -323,8 +343,7 @@ static void mvpp2_frag_free(const struct mvpp2_bm_pool *pool, void *data)
/* Buffer Manager configuration routines */
/* Create pool */
-static int mvpp2_bm_pool_create(struct platform_device *pdev,
- struct mvpp2 *priv,
+static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv,
struct mvpp2_bm_pool *bm_pool, int size)
{
u32 val;
@@ -343,7 +362,7 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
else
bm_pool->size_bytes = 2 * sizeof(u64) * size;
- bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
+ bm_pool->virt_addr = dma_alloc_coherent(dev, bm_pool->size_bytes,
&bm_pool->dma_addr,
GFP_KERNEL);
if (!bm_pool->virt_addr)
@@ -351,9 +370,9 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
MVPP2_BM_POOL_PTR_ALIGN)) {
- dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+ dma_free_coherent(dev, bm_pool->size_bytes,
bm_pool->virt_addr, bm_pool->dma_addr);
- dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
+ dev_err(dev, "BM pool %d is not %d bytes aligned\n",
bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
return -ENOMEM;
}
@@ -468,15 +487,14 @@ static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_p
}
/* Cleanup pool */
-static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
- struct mvpp2 *priv,
+static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv,
struct mvpp2_bm_pool *bm_pool)
{
int buf_num;
u32 val;
buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
- mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+ mvpp2_bm_bufs_free(dev, priv, bm_pool, buf_num);
/* Check buffer counters after free */
buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
@@ -490,24 +508,26 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
val |= MVPP2_BM_STOP_MASK;
mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
- dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+ dma_free_coherent(dev, bm_pool->size_bytes,
bm_pool->virt_addr,
bm_pool->dma_addr);
return 0;
}
-static int mvpp2_bm_pools_init(struct platform_device *pdev,
- struct mvpp2 *priv)
+static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
{
- int i, err, size;
+ int i, err, size, poolnum = MVPP2_BM_POOLS_NUM;
struct mvpp2_bm_pool *bm_pool;
+ if (priv->percpu_pools)
+ poolnum = mvpp2_get_nrxqs(priv) * 2;
+
/* Create all pools with maximum size */
size = MVPP2_BM_POOL_SIZE_MAX;
- for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+ for (i = 0; i < poolnum; i++) {
bm_pool = &priv->bm_pools[i];
bm_pool->id = i;
- err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size);
+ err = mvpp2_bm_pool_create(dev, priv, bm_pool, size);
if (err)
goto err_unroll_pools;
mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0);
@@ -515,17 +535,23 @@ static int mvpp2_bm_pools_init(struct platform_device *pdev,
return 0;
err_unroll_pools:
- dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size);
+ dev_err(dev, "failed to create BM pool %d, size %d\n", i, size);
for (i = i - 1; i >= 0; i--)
- mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]);
+ mvpp2_bm_pool_destroy(dev, priv, &priv->bm_pools[i]);
return err;
}
-static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
+static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
{
- int i, err;
+ int i, err, poolnum = MVPP2_BM_POOLS_NUM;
- for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+ if (priv->percpu_pools)
+ poolnum = mvpp2_get_nrxqs(priv) * 2;
+
+ dev_info(dev, "using %d %s buffers\n", poolnum,
+ priv->percpu_pools ? "per-cpu" : "shared");
+
+ for (i = 0; i < poolnum; i++) {
/* Mask BM all interrupts */
mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0);
/* Clear BM cause register */
@@ -533,12 +559,12 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
}
/* Allocate and initialize BM pools */
- priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM,
+ priv->bm_pools = devm_kcalloc(dev, poolnum,
sizeof(*priv->bm_pools), GFP_KERNEL);
if (!priv->bm_pools)
return -ENOMEM;
- err = mvpp2_bm_pools_init(pdev, priv);
+ err = mvpp2_bm_pools_init(dev, priv);
if (err < 0)
return err;
return 0;
@@ -679,6 +705,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
phys_addr_t phys_addr;
void *buf;
+ if (port->priv->percpu_pools &&
+ bm_pool->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+ netdev_err(port->dev,
+ "attempted to use jumbo frames with per-cpu pools");
+ return 0;
+ }
+
buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
total_size = MVPP2_RX_TOTAL_SIZE(buf_size);
@@ -722,7 +755,64 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
int num;
- if (pool >= MVPP2_BM_POOLS_NUM) {
+ if ((port->priv->percpu_pools && pool > mvpp2_get_nrxqs(port->priv) * 2) ||
+ (!port->priv->percpu_pools && pool >= MVPP2_BM_POOLS_NUM)) {
+ netdev_err(port->dev, "Invalid pool %d\n", pool);
+ return NULL;
+ }
+
+ /* Allocate buffers in case BM pool is used as long pool, but packet
+ * size doesn't match MTU or BM pool hasn't being used yet
+ */
+ if (new_pool->pkt_size == 0) {
+ int pkts_num;
+
+ /* Set default buffer number or free all the buffers in case
+ * the pool is not empty
+ */
+ pkts_num = new_pool->buf_num;
+ if (pkts_num == 0) {
+ if (port->priv->percpu_pools) {
+ if (pool < port->nrxqs)
+ pkts_num = mvpp2_pools[MVPP2_BM_SHORT].buf_num;
+ else
+ pkts_num = mvpp2_pools[MVPP2_BM_LONG].buf_num;
+ } else {
+ pkts_num = mvpp2_pools[pool].buf_num;
+ }
+ } else {
+ mvpp2_bm_bufs_free(port->dev->dev.parent,
+ port->priv, new_pool, pkts_num);
+ }
+
+ new_pool->pkt_size = pkt_size;
+ new_pool->frag_size =
+ SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
+ MVPP2_SKB_SHINFO_SIZE;
+
+ /* Allocate buffers for this pool */
+ num = mvpp2_bm_bufs_add(port, new_pool, pkts_num);
+ if (num != pkts_num) {
+ WARN(1, "pool %d: %d of %d allocated\n",
+ new_pool->id, num, pkts_num);
+ return NULL;
+ }
+ }
+
+ mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
+ MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
+
+ return new_pool;
+}
+
+static struct mvpp2_bm_pool *
+mvpp2_bm_pool_use_percpu(struct mvpp2_port *port, int type,
+ unsigned int pool, int pkt_size)
+{
+ struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
+ int num;
+
+ if (pool > port->nrxqs * 2) {
netdev_err(port->dev, "Invalid pool %d\n", pool);
return NULL;
}
@@ -738,7 +828,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
*/
pkts_num = new_pool->buf_num;
if (pkts_num == 0)
- pkts_num = mvpp2_pools[pool].buf_num;
+ pkts_num = mvpp2_pools[type].buf_num;
else
mvpp2_bm_bufs_free(port->dev->dev.parent,
port->priv, new_pool, pkts_num);
@@ -763,11 +853,11 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
return new_pool;
}
-/* Initialize pools for swf */
-static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+/* Initialize pools for swf, shared buffers variant */
+static int mvpp2_swf_bm_pool_init_shared(struct mvpp2_port *port)
{
- int rxq;
enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+ int rxq;
/* If port pkt_size is higher than 1518B:
* HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
@@ -811,6 +901,47 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
return 0;
}
+/* Initialize pools for swf, percpu buffers variant */
+static int mvpp2_swf_bm_pool_init_percpu(struct mvpp2_port *port)
+{
+ struct mvpp2_bm_pool *p;
+ int i;
+
+ for (i = 0; i < port->nrxqs; i++) {
+ p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_SHORT, i,
+ mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
+ if (!p)
+ return -ENOMEM;
+
+ port->priv->bm_pools[i].port_map |= BIT(port->id);
+ mvpp2_rxq_short_pool_set(port, i, port->priv->bm_pools[i].id);
+ }
+
+ for (i = 0; i < port->nrxqs; i++) {
+ p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_LONG, i + port->nrxqs,
+ mvpp2_pools[MVPP2_BM_LONG].pkt_size);
+ if (!p)
+ return -ENOMEM;
+
+ port->priv->bm_pools[i + port->nrxqs].port_map |= BIT(port->id);
+ mvpp2_rxq_long_pool_set(port, i,
+ port->priv->bm_pools[i + port->nrxqs].id);
+ }
+
+ port->pool_long = NULL;
+ port->pool_short = NULL;
+
+ return 0;
+}
+
+static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+{
+ if (port->priv->percpu_pools)
+ return mvpp2_swf_bm_pool_init_percpu(port);
+ else
+ return mvpp2_swf_bm_pool_init_shared(port);
+}
+
static void mvpp2_set_hw_csum(struct mvpp2_port *port,
enum mvpp2_bm_pool_log_num new_long_pool)
{
@@ -837,6 +968,9 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
enum mvpp2_bm_pool_log_num new_long_pool;
int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
+ if (port->priv->percpu_pools)
+ goto out_set;
+
/* If port MTU is higher than 1518B:
* HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
* else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
@@ -866,6 +1000,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
mvpp2_set_hw_csum(port, new_long_pool);
}
+out_set:
dev->mtu = mtu;
dev->wanted_features = dev->features;
@@ -3699,10 +3834,48 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
return err;
}
+/* Shut down all the ports, reconfigure the pools as percpu or shared,
+ * then bring up again all ports.
+ */
+static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
+{
+ int numbufs = MVPP2_BM_POOLS_NUM, i;
+ struct mvpp2_port *port = NULL;
+ bool status[MVPP2_MAX_PORTS];
+
+ for (i = 0; i < priv->port_count; i++) {
+ port = priv->port_list[i];
+ status[i] = netif_running(port->dev);
+ if (status[i])
+ mvpp2_stop(port->dev);
+ }
+
+ /* nrxqs is the same for all ports */
+ if (priv->percpu_pools)
+ numbufs = port->nrxqs * 2;
+
+ for (i = 0; i < numbufs; i++)
+ mvpp2_bm_pool_destroy(port->dev->dev.parent, priv, &priv->bm_pools[i]);
+
+ devm_kfree(port->dev->dev.parent, priv->bm_pools);
+ priv->percpu_pools = percpu;
+ mvpp2_bm_init(port->dev->dev.parent, priv);
+
+ for (i = 0; i < priv->port_count; i++) {
+ port = priv->port_list[i];
+ mvpp2_swf_bm_pool_init(port);
+ if (status[i])
+ mvpp2_open(port->dev);
+ }
+
+ return 0;
+}
+
static int mvpp2_change_mtu(struct net_device *dev, int mtu)
{
struct mvpp2_port *port = netdev_priv(dev);
bool running = netif_running(dev);
+ struct mvpp2 *priv = port->priv;
int err;
if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
@@ -3711,6 +3884,31 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
}
+ if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
+ if (priv->percpu_pools) {
+ netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
+ mvpp2_bm_switch_buffers(priv, false);
+ }
+ } else {
+ bool jumbo = false;
+ int i;
+
+ for (i = 0; i < priv->port_count; i++)
+ if (priv->port_list[i] != port &&
+ MVPP2_RX_PKT_SIZE(priv->port_list[i]->dev->mtu) >
+ MVPP2_BM_LONG_PKT_SIZE) {
+ jumbo = true;
+ break;
+ }
+
+ /* No port is using jumbo frames */
+ if (!jumbo) {
+ dev_info(port->dev->dev.parent,
+ "all ports have a low MTU, switching to per-cpu buffers");
+ mvpp2_bm_switch_buffers(priv, true);
+ }
+ }
+
if (running)
mvpp2_stop_dev(port);
@@ -5017,18 +5215,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
}
ntxqs = MVPP2_MAX_TXQ;
- if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) {
- nrxqs = 1;
- } else {
- /* According to the PPv2.2 datasheet and our experiments on
- * PPv2.1, RX queues have an allocation granularity of 4 (when
- * more than a single one on PPv2.2).
- * Round up to nearest multiple of 4.
- */
- nrxqs = (num_possible_cpus() + 3) & ~0x3;
- if (nrxqs > MVPP2_PORT_MAX_RXQ)
- nrxqs = MVPP2_PORT_MAX_RXQ;
- }
+ nrxqs = mvpp2_get_nrxqs(priv);
dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
if (!dev)
@@ -5190,7 +5377,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
dev->features |= NETIF_F_NTUPLE;
}
- mvpp2_set_hw_csum(port, port->pool_long->id);
+ if (!port->priv->percpu_pools)
+ mvpp2_set_hw_csum(port, port->pool_long->id);
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
@@ -5482,7 +5670,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
/* Buffer Manager initialization */
- err = mvpp2_bm_init(pdev, priv);
+ err = mvpp2_bm_init(&pdev->dev, priv);
if (err < 0)
return err;
@@ -5568,6 +5756,10 @@ static int mvpp2_probe(struct platform_device *pdev)
priv->sysctrl_base = NULL;
}
+ if (priv->hw_version == MVPP22 &&
+ mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS)
+ priv->percpu_pools = 1;
+
mvpp2_setup_bm_pool();
@@ -5749,7 +5941,7 @@ static int mvpp2_remove(struct platform_device *pdev)
for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
- mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
+ mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool);
}
for (i = 0; i < MVPP2_MAX_THREADS; i++) {