summaryrefslogtreecommitdiffstats
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c199
1 files changed, 157 insertions, 42 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cce0e4f8a536..ddca4bf1cfc8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -21,6 +21,7 @@
#include <linux/uaccess.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <linux/rculist.h>
#include <net/xdp_sock.h>
#include <net/xdp.h>
@@ -36,45 +37,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
- return !!xs->rx;
+ return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
+ READ_ONCE(xs->umem->fq);
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- u32 *id, len = xdp->data_end - xdp->data;
- void *buffer;
- int err = 0;
+ return xskq_peek_addr(umem->fq, addr);
+}
+EXPORT_SYMBOL(xsk_umem_peek_addr);
- if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
- return -EINVAL;
+void xsk_umem_discard_addr(struct xdp_umem *umem)
+{
+ xskq_discard_addr(umem->fq);
+}
+EXPORT_SYMBOL(xsk_umem_discard_addr);
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ void *buffer;
+ u64 addr;
+ int err;
- id = xskq_peek_id(xs->umem->fq);
- if (!id)
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
+ xs->rx_dropped++;
return -ENOSPC;
+ }
+
+ addr += xs->umem->headroom;
- buffer = xdp_umem_get_data_with_headroom(xs->umem, *id);
+ buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data, len);
- err = xskq_produce_batch_desc(xs->rx, *id, len,
- xs->umem->frame_headroom);
- if (!err)
- xskq_discard_id(xs->umem->fq);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
+ if (!err) {
+ xskq_discard_addr(xs->umem->fq);
+ xdp_return_buff(xdp);
+ return 0;
+ }
+ xs->rx_dropped++;
return err;
}
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- int err;
+ int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
- err = __xsk_rcv(xs, xdp);
- if (likely(!err))
+ if (err) {
xdp_return_buff(xdp);
- else
xs->rx_dropped++;
+ }
return err;
}
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ u32 len;
+
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+ return -EINVAL;
+
+ len = xdp->data_end - xdp->data;
+
+ return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
+ __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+}
+
void xsk_flush(struct xdp_sock *xs)
{
xskq_produce_flush_desc(xs->rx);
@@ -83,23 +113,91 @@ void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp->data_end - xdp->data;
+ void *buffer;
+ u64 addr;
int err;
- err = __xsk_rcv(xs, xdp);
- if (!err)
- xsk_flush(xs);
- else
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
+ addr += xs->umem->headroom;
+
+ buffer = xdp_umem_get_data(xs->umem, addr);
+ memcpy(buffer, xdp->data, len);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
+ if (!err) {
+ xskq_discard_addr(xs->umem->fq);
+ xsk_flush(xs);
+ return 0;
+ }
+ xs->rx_dropped++;
return err;
}
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+{
+ xskq_produce_flush_addr_n(umem->cq, nb_entries);
+}
+EXPORT_SYMBOL(xsk_umem_complete_tx);
+
+void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->sk.sk_write_space(&xs->sk);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx_done);
+
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
+{
+ struct xdp_desc desc;
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ if (!xskq_peek_desc(xs->tx, &desc))
+ continue;
+
+ if (xskq_produce_addr_lazy(umem->cq, desc.addr))
+ goto out;
+
+ *dma = xdp_umem_get_dma(umem, desc.addr);
+ *len = desc.len;
+
+ xskq_discard_desc(xs->tx);
+ rcu_read_unlock();
+ return true;
+ }
+
+out:
+ rcu_read_unlock();
+ return false;
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx);
+
+static int xsk_zc_xmit(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct net_device *dev = xs->dev;
+
+ return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+}
+
static void xsk_destruct_skb(struct sk_buff *skb)
{
- u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+ u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
- WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+ WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
sock_wfree(skb);
}
@@ -107,7 +205,6 @@ static void xsk_destruct_skb(struct sk_buff *skb)
static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
size_t total_len)
{
- bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
u32 max_batch = TX_BATCH_SIZE;
struct xdp_sock *xs = xdp_sk(sk);
bool sent_frame = false;
@@ -117,21 +214,20 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (need_wait)
- return -EOPNOTSUPP;
mutex_lock(&xs->mutex);
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
- u32 id, len;
+ u64 addr;
+ u32 len;
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
- if (xskq_reserve_id(xs->umem->cq)) {
+ if (xskq_reserve_addr(xs->umem->cq)) {
err = -EAGAIN;
goto out;
}
@@ -147,15 +243,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
+ skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
goto out;
}
skb_put(skb, len);
- id = desc.idx;
- buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+ addr = desc.addr;
+ buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err)) {
kfree_skb(skb);
@@ -165,7 +261,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -190,6 +286,7 @@ out:
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
+ bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
@@ -197,8 +294,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (need_wait)
+ return -EOPNOTSUPP;
- return xsk_generic_xmit(sk, m, total_len);
+ return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -288,6 +387,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
+ u32 flags, qid;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@@ -312,16 +412,26 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
- if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
- (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
+ qid = sxdp->sxdp_queue_id;
+
+ if ((xs->rx && qid >= dev->real_num_rx_queues) ||
+ (xs->tx && qid >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
- if (sxdp->sxdp_flags & XDP_SHARED_UMEM) {
+ flags = sxdp->sxdp_flags;
+
+ if (flags & XDP_SHARED_UMEM) {
struct xdp_sock *umem_xs;
struct socket *sock;
+ if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+ /* Cannot specify flags for shared sockets. */
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (xs->umem) {
/* We have already our own. */
err = -EINVAL;
@@ -340,8 +450,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = -EBADF;
sockfd_put(sock);
goto out_unlock;
- } else if (umem_xs->dev != dev ||
- umem_xs->queue_id != sxdp->sxdp_queue_id) {
+ } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
err = -EINVAL;
sockfd_put(sock);
goto out_unlock;
@@ -357,13 +466,18 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
/* This xsk has its own umem. */
xskq_set_umem(xs->umem->fq, &xs->umem->props);
xskq_set_umem(xs->umem->cq, &xs->umem->props);
+
+ err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
+ if (err)
+ goto out_unlock;
}
xs->dev = dev;
- xs->queue_id = sxdp->sxdp_queue_id;
-
+ xs->zc = xs->umem->zc;
+ xs->queue_id = qid;
xskq_set_umem(xs->rx, &xs->umem->props);
xskq_set_umem(xs->tx, &xs->umem->props);
+ xdp_add_sk_umem(xs->umem, xs);
out_unlock:
if (err)
@@ -601,6 +715,7 @@ static void xsk_destruct(struct sock *sk)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xdp_del_sk_umem(xs->umem, xs);
xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);