summaryrefslogtreecommitdiffstats
path: root/net/xdp
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp')
-rw-r--r--net/xdp/xdp_umem.c151
-rw-r--r--net/xdp/xdp_umem.h45
-rw-r--r--net/xdp/xdp_umem_props.h4
-rw-r--r--net/xdp/xsk.c199
-rw-r--r--net/xdp/xsk_queue.c2
-rw-r--r--net/xdp/xsk_queue.h98
6 files changed, 355 insertions, 144 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 87998818116f..7eb4948a38d2 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -13,8 +13,107 @@
#include <linux/mm.h>
#include "xdp_umem.h"
+#include "xsk_queue.h"
-#define XDP_UMEM_MIN_FRAME_SIZE 2048
+#define XDP_UMEM_MIN_CHUNK_SIZE 2048
+
+void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_add_rcu(&xs->list, &umem->xsk_list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+}
+
+void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
+{
+ unsigned long flags;
+
+ if (xs->dev) {
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_del_rcu(&xs->list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+
+ if (umem->zc)
+ synchronize_net();
+ }
+}
+
+int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
+ u32 queue_id, u16 flags)
+{
+ bool force_zc, force_copy;
+ struct netdev_bpf bpf;
+ int err;
+
+ force_zc = flags & XDP_ZEROCOPY;
+ force_copy = flags & XDP_COPY;
+
+ if (force_zc && force_copy)
+ return -EINVAL;
+
+ if (force_copy)
+ return 0;
+
+ dev_hold(dev);
+
+ if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
+ bpf.command = XDP_QUERY_XSK_UMEM;
+
+ rtnl_lock();
+ err = dev->netdev_ops->ndo_bpf(dev, &bpf);
+ rtnl_unlock();
+
+ if (err) {
+ dev_put(dev);
+ return force_zc ? -ENOTSUPP : 0;
+ }
+
+ bpf.command = XDP_SETUP_XSK_UMEM;
+ bpf.xsk.umem = umem;
+ bpf.xsk.queue_id = queue_id;
+
+ rtnl_lock();
+ err = dev->netdev_ops->ndo_bpf(dev, &bpf);
+ rtnl_unlock();
+
+ if (err) {
+ dev_put(dev);
+ return force_zc ? err : 0; /* fail or fallback */
+ }
+
+ umem->dev = dev;
+ umem->queue_id = queue_id;
+ umem->zc = true;
+ return 0;
+ }
+
+ dev_put(dev);
+ return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
+}
+
+static void xdp_umem_clear_dev(struct xdp_umem *umem)
+{
+ struct netdev_bpf bpf;
+ int err;
+
+ if (umem->dev) {
+ bpf.command = XDP_SETUP_XSK_UMEM;
+ bpf.xsk.umem = NULL;
+ bpf.xsk.queue_id = umem->queue_id;
+
+ rtnl_lock();
+ err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
+ rtnl_unlock();
+
+ if (err)
+ WARN(1, "failed to disable umem!\n");
+
+ dev_put(umem->dev);
+ umem->dev = NULL;
+ }
+}
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
@@ -42,6 +141,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
struct task_struct *task;
struct mm_struct *mm;
+ xdp_umem_clear_dev(umem);
+
if (umem->fq) {
xskq_destroy(umem->fq);
umem->fq = NULL;
@@ -64,6 +165,9 @@ static void xdp_umem_release(struct xdp_umem *umem)
goto out;
mmput(mm);
+ kfree(umem->pages);
+ umem->pages = NULL;
+
xdp_umem_unaccount_pages(umem);
out:
kfree(umem);
@@ -151,12 +255,12 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
- u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
+ u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
+ unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
- unsigned int nframes, nfpp;
- int size_chk, err;
+ int size_chk, err, i;
- if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
+ if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
* - using an IOMMU, or
@@ -166,7 +270,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (!is_power_of_2(frame_size))
+ if (!is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -179,33 +283,32 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if ((addr + size) < addr)
return -EINVAL;
- nframes = (unsigned int)div_u64(size, frame_size);
- if (nframes == 0 || nframes > UINT_MAX)
+ chunks = (unsigned int)div_u64(size, chunk_size);
+ if (chunks == 0)
return -EINVAL;
- nfpp = PAGE_SIZE / frame_size;
- if (nframes < nfpp || nframes % nfpp)
+ chunks_per_page = PAGE_SIZE / chunk_size;
+ if (chunks < chunks_per_page || chunks % chunks_per_page)
return -EINVAL;
- frame_headroom = ALIGN(frame_headroom, 64);
+ headroom = ALIGN(headroom, 64);
- size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
+ size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
if (size_chk < 0)
return -EINVAL;
umem->pid = get_task_pid(current, PIDTYPE_PID);
- umem->size = (size_t)size;
umem->address = (unsigned long)addr;
- umem->props.frame_size = frame_size;
- umem->props.nframes = nframes;
- umem->frame_headroom = frame_headroom;
+ umem->props.chunk_mask = ~((u64)chunk_size - 1);
+ umem->props.size = size;
+ umem->headroom = headroom;
+ umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
+ INIT_LIST_HEAD(&umem->xsk_list);
+ spin_lock_init(&umem->xsk_list_lock);
- umem->frame_size_log2 = ilog2(frame_size);
- umem->nfpp_mask = nfpp - 1;
- umem->nfpplog2 = ilog2(nfpp);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
@@ -215,6 +318,16 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
err = xdp_umem_pin_pages(umem);
if (err)
goto out_account;
+
+ umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
+ if (!umem->pages) {
+ err = -ENOMEM;
+ goto out_account;
+ }
+
+ for (i = 0; i < umem->npgs; i++)
+ umem->pages[i].addr = page_address(umem->pgs[i]);
+
return 0;
out_account:
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 0881cf456230..f11560334f88 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -6,52 +6,25 @@
#ifndef XDP_UMEM_H_
#define XDP_UMEM_H_
-#include <linux/mm.h>
-#include <linux/if_xdp.h>
-#include <linux/workqueue.h>
+#include <net/xdp_sock.h>
-#include "xsk_queue.h"
-#include "xdp_umem_props.h"
-
-struct xdp_umem {
- struct xsk_queue *fq;
- struct xsk_queue *cq;
- struct page **pgs;
- struct xdp_umem_props props;
- u32 npgs;
- u32 frame_headroom;
- u32 nfpp_mask;
- u32 nfpplog2;
- u32 frame_size_log2;
- struct user_struct *user;
- struct pid *pid;
- unsigned long address;
- size_t size;
- refcount_t users;
- struct work_struct work;
-};
-
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx)
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
- u64 pg, off;
- char *data;
-
- pg = idx >> umem->nfpplog2;
- off = (idx & umem->nfpp_mask) << umem->frame_size_log2;
-
- data = page_address(umem->pgs[pg]);
- return data + off;
+ return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
}
-static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
- u32 idx)
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
- return xdp_umem_get_data(umem, idx) + umem->frame_headroom;
+ return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
}
+int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
+ u32 queue_id, u16 flags);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
+void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
+void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
index 2cf8ec485fd2..40eab10dfc49 100644
--- a/net/xdp/xdp_umem_props.h
+++ b/net/xdp/xdp_umem_props.h
@@ -7,8 +7,8 @@
#define XDP_UMEM_PROPS_H_
struct xdp_umem_props {
- u32 frame_size;
- u32 nframes;
+ u64 chunk_mask;
+ u64 size;
};
#endif /* XDP_UMEM_PROPS_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cce0e4f8a536..ddca4bf1cfc8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -21,6 +21,7 @@
#include <linux/uaccess.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <linux/rculist.h>
#include <net/xdp_sock.h>
#include <net/xdp.h>
@@ -36,45 +37,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
- return !!xs->rx;
+ return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
+ READ_ONCE(xs->umem->fq);
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- u32 *id, len = xdp->data_end - xdp->data;
- void *buffer;
- int err = 0;
+ return xskq_peek_addr(umem->fq, addr);
+}
+EXPORT_SYMBOL(xsk_umem_peek_addr);
- if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
- return -EINVAL;
+void xsk_umem_discard_addr(struct xdp_umem *umem)
+{
+ xskq_discard_addr(umem->fq);
+}
+EXPORT_SYMBOL(xsk_umem_discard_addr);
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ void *buffer;
+ u64 addr;
+ int err;
- id = xskq_peek_id(xs->umem->fq);
- if (!id)
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
+ xs->rx_dropped++;
return -ENOSPC;
+ }
+
+ addr += xs->umem->headroom;
- buffer = xdp_umem_get_data_with_headroom(xs->umem, *id);
+ buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data, len);
- err = xskq_produce_batch_desc(xs->rx, *id, len,
- xs->umem->frame_headroom);
- if (!err)
- xskq_discard_id(xs->umem->fq);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
+ if (!err) {
+ xskq_discard_addr(xs->umem->fq);
+ xdp_return_buff(xdp);
+ return 0;
+ }
+ xs->rx_dropped++;
return err;
}
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- int err;
+ int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
- err = __xsk_rcv(xs, xdp);
- if (likely(!err))
+ if (err) {
xdp_return_buff(xdp);
- else
xs->rx_dropped++;
+ }
return err;
}
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ u32 len;
+
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+ return -EINVAL;
+
+ len = xdp->data_end - xdp->data;
+
+ return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
+ __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+}
+
void xsk_flush(struct xdp_sock *xs)
{
xskq_produce_flush_desc(xs->rx);
@@ -83,23 +113,91 @@ void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp->data_end - xdp->data;
+ void *buffer;
+ u64 addr;
int err;
- err = __xsk_rcv(xs, xdp);
- if (!err)
- xsk_flush(xs);
- else
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
+ addr += xs->umem->headroom;
+
+ buffer = xdp_umem_get_data(xs->umem, addr);
+ memcpy(buffer, xdp->data, len);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
+ if (!err) {
+ xskq_discard_addr(xs->umem->fq);
+ xsk_flush(xs);
+ return 0;
+ }
+ xs->rx_dropped++;
return err;
}
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+{
+ xskq_produce_flush_addr_n(umem->cq, nb_entries);
+}
+EXPORT_SYMBOL(xsk_umem_complete_tx);
+
+void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->sk.sk_write_space(&xs->sk);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx_done);
+
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
+{
+ struct xdp_desc desc;
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ if (!xskq_peek_desc(xs->tx, &desc))
+ continue;
+
+ if (xskq_produce_addr_lazy(umem->cq, desc.addr))
+ goto out;
+
+ *dma = xdp_umem_get_dma(umem, desc.addr);
+ *len = desc.len;
+
+ xskq_discard_desc(xs->tx);
+ rcu_read_unlock();
+ return true;
+ }
+
+out:
+ rcu_read_unlock();
+ return false;
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx);
+
+static int xsk_zc_xmit(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct net_device *dev = xs->dev;
+
+ return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+}
+
static void xsk_destruct_skb(struct sk_buff *skb)
{
- u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+ u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
- WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+ WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
sock_wfree(skb);
}
@@ -107,7 +205,6 @@ static void xsk_destruct_skb(struct sk_buff *skb)
static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
size_t total_len)
{
- bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
u32 max_batch = TX_BATCH_SIZE;
struct xdp_sock *xs = xdp_sk(sk);
bool sent_frame = false;
@@ -117,21 +214,20 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (need_wait)
- return -EOPNOTSUPP;
mutex_lock(&xs->mutex);
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
- u32 id, len;
+ u64 addr;
+ u32 len;
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
- if (xskq_reserve_id(xs->umem->cq)) {
+ if (xskq_reserve_addr(xs->umem->cq)) {
err = -EAGAIN;
goto out;
}
@@ -147,15 +243,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
+ skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
goto out;
}
skb_put(skb, len);
- id = desc.idx;
- buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+ addr = desc.addr;
+ buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err)) {
kfree_skb(skb);
@@ -165,7 +261,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -190,6 +286,7 @@ out:
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
+ bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
@@ -197,8 +294,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (need_wait)
+ return -EOPNOTSUPP;
- return xsk_generic_xmit(sk, m, total_len);
+ return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -288,6 +387,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
+ u32 flags, qid;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@@ -312,16 +412,26 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
- if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
- (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
+ qid = sxdp->sxdp_queue_id;
+
+ if ((xs->rx && qid >= dev->real_num_rx_queues) ||
+ (xs->tx && qid >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
- if (sxdp->sxdp_flags & XDP_SHARED_UMEM) {
+ flags = sxdp->sxdp_flags;
+
+ if (flags & XDP_SHARED_UMEM) {
struct xdp_sock *umem_xs;
struct socket *sock;
+ if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+ /* Cannot specify flags for shared sockets. */
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (xs->umem) {
/* We have already our own. */
err = -EINVAL;
@@ -340,8 +450,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = -EBADF;
sockfd_put(sock);
goto out_unlock;
- } else if (umem_xs->dev != dev ||
- umem_xs->queue_id != sxdp->sxdp_queue_id) {
+ } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
err = -EINVAL;
sockfd_put(sock);
goto out_unlock;
@@ -357,13 +466,18 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
/* This xsk has its own umem. */
xskq_set_umem(xs->umem->fq, &xs->umem->props);
xskq_set_umem(xs->umem->cq, &xs->umem->props);
+
+ err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
+ if (err)
+ goto out_unlock;
}
xs->dev = dev;
- xs->queue_id = sxdp->sxdp_queue_id;
-
+ xs->zc = xs->umem->zc;
+ xs->queue_id = qid;
xskq_set_umem(xs->rx, &xs->umem->props);
xskq_set_umem(xs->tx, &xs->umem->props);
+ xdp_add_sk_umem(xs->umem, xs);
out_unlock:
if (err)
@@ -601,6 +715,7 @@ static void xsk_destruct(struct sock *sk)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xdp_del_sk_umem(xs->umem, xs);
xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index ebe85e59507e..6c32e92e98fc 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -17,7 +17,7 @@ void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
{
- return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u32);
+ return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);
}
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index cb8e5be35110..ef6a6f0ec949 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -8,10 +8,10 @@
#include <linux/types.h>
#include <linux/if_xdp.h>
-
-#include "xdp_umem_props.h"
+#include <net/xdp_sock.h>
#define RX_BATCH_SIZE 16
+#define LAZY_UPDATE_THRESHOLD 128
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
@@ -27,7 +27,7 @@ struct xdp_rxtx_ring {
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
- u32 desc[0] ____cacheline_aligned_in_smp;
+ u64 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue {
@@ -62,9 +62,14 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries;
}
+static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
+{
+ return q->nentries - (producer - q->cons_tail);
+}
+
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = q->nentries - (producer - q->cons_tail);
+ u32 free_entries = xskq_nb_free_lazy(q, producer);
if (free_entries >= dcnt)
return free_entries;
@@ -76,23 +81,25 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
/* UMEM queue */
-static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx)
+static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
- if (unlikely(idx >= q->umem_props.nframes)) {
+ if (addr >= q->umem_props.size) {
q->invalid_descs++;
return false;
}
+
return true;
}
-static inline u32 *xskq_validate_id(struct xsk_queue *q)
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- if (xskq_is_valid_id(q, ring->desc[idx]))
- return &ring->desc[idx];
+ *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask;
+ if (xskq_is_valid_addr(q, *addr))
+ return addr;
q->cons_tail++;
}
@@ -100,35 +107,32 @@ static inline u32 *xskq_validate_id(struct xsk_queue *q)
return NULL;
}
-static inline u32 *xskq_peek_id(struct xsk_queue *q)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
{
- struct xdp_umem_ring *ring;
-
if (q->cons_tail == q->cons_head) {
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb();
-
- return xskq_validate_id(q);
}
- ring = (struct xdp_umem_ring *)q->ring;
- return &ring->desc[q->cons_tail & q->ring_mask];
+ return xskq_validate_addr(q, addr);
}
-static inline void xskq_discard_id(struct xsk_queue *q)
+static inline void xskq_discard_addr(struct xsk_queue *q)
{
q->cons_tail++;
- (void)xskq_validate_id(q);
}
-static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
+static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- ring->desc[q->prod_tail++ & q->ring_mask] = id;
+ if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ return -ENOSPC;
+
+ ring->desc[q->prod_tail++ & q->ring_mask] = addr;
/* Order producer and data */
smp_wmb();
@@ -137,7 +141,28 @@ static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
return 0;
}
-static inline int xskq_reserve_id(struct xsk_queue *q)
+static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+ if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
+ return -ENOSPC;
+
+ ring->desc[q->prod_head++ & q->ring_mask] = addr;
+ return 0;
+}
+
+static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
+ u32 nb_entries)
+{
+ /* Order producer and data */
+ smp_wmb();
+
+ q->prod_tail += nb_entries;
+ WRITE_ONCE(q->ring->producer, q->prod_tail);
+}
+
+static inline int xskq_reserve_addr(struct xsk_queue *q)
{
if (xskq_nb_free(q, q->prod_head, 1) == 0)
return -ENOSPC;
@@ -150,16 +175,11 @@ static inline int xskq_reserve_id(struct xsk_queue *q)
static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
{
- u32 buff_len;
-
- if (unlikely(d->idx >= q->umem_props.nframes)) {
- q->invalid_descs++;
+ if (!xskq_is_valid_addr(q, d->addr))
return false;
- }
- buff_len = q->umem_props.frame_size;
- if (unlikely(d->len > buff_len || d->len == 0 ||
- d->offset > buff_len || d->offset + d->len > buff_len)) {
+ if (((d->addr + d->len) & q->umem_props.chunk_mask) !=
+ (d->addr & q->umem_props.chunk_mask)) {
q->invalid_descs++;
return false;
}
@@ -174,11 +194,9 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- if (xskq_is_valid_desc(q, &ring->desc[idx])) {
- if (desc)
- *desc = ring->desc[idx];
+ *desc = READ_ONCE(ring->desc[idx]);
+ if (xskq_is_valid_desc(q, desc))
return desc;
- }
q->cons_tail++;
}
@@ -189,31 +207,24 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
struct xdp_desc *desc)
{
- struct xdp_rxtx_ring *ring;
-
if (q->cons_tail == q->cons_head) {
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb();
-
- return xskq_validate_desc(q, desc);
}
- ring = (struct xdp_rxtx_ring *)q->ring;
- *desc = ring->desc[q->cons_tail & q->ring_mask];
- return desc;
+ return xskq_validate_desc(q, desc);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
{
q->cons_tail++;
- (void)xskq_validate_desc(q, NULL);
}
static inline int xskq_produce_batch_desc(struct xsk_queue *q,
- u32 id, u32 len, u16 offset)
+ u64 addr, u32 len)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx;
@@ -222,9 +233,8 @@ static inline int xskq_produce_batch_desc(struct xsk_queue *q,
return -ENOSPC;
idx = (q->prod_head++) & q->ring_mask;
- ring->desc[idx].idx = id;
+ ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
- ring->desc[idx].offset = offset;
return 0;
}