summaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/Makefile2
-rw-r--r--io_uring/io_uring.c873
-rw-r--r--io_uring/io_uring.h22
-rw-r--r--io_uring/net.c779
-rw-r--r--io_uring/net.h43
5 files changed, 884 insertions, 835 deletions
diff --git a/io_uring/Makefile b/io_uring/Makefile
index de953c022c6e..c9ec1bbabfbd 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
sync.o advise.o filetable.o \
openclose.o uring_cmd.o epoll.o \
- statx.o
+ statx.o net.o
obj-$(CONFIG_IO_WQ) += io-wq.o
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index eb01d1aadeb4..cbc20985cd6f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -102,6 +102,7 @@
#include "uring_cmd.h"
#include "epoll.h"
#include "statx.h"
+#include "net.h"
#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
@@ -131,8 +132,6 @@
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
IO_REQ_CLEAN_FLAGS)
-#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
-
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
struct io_mapped_ubuf {
@@ -295,25 +294,6 @@ struct io_timeout_data {
u32 flags;
};
-struct io_accept {
- struct file *file;
- struct sockaddr __user *addr;
- int __user *addr_len;
- int flags;
- u32 file_slot;
- unsigned long nofile;
-};
-
-struct io_socket {
- struct file *file;
- int domain;
- int type;
- int protocol;
- int flags;
- u32 file_slot;
- unsigned long nofile;
-};
-
struct io_cancel {
struct file *file;
u64 addr;
@@ -350,25 +330,6 @@ struct io_rw {
rwf_t flags;
};
-struct io_connect {
- struct file *file;
- struct sockaddr __user *addr;
- int addr_len;
-};
-
-struct io_sr_msg {
- struct file *file;
- union {
- struct compat_msghdr __user *umsg_compat;
- struct user_msghdr __user *umsg;
- void __user *buf;
- };
- int msg_flags;
- size_t len;
- size_t done_io;
- unsigned int flags;
-};
-
struct io_rsrc_update {
struct file *file;
u64 arg;
@@ -385,30 +346,12 @@ struct io_provide_buf {
__u16 bid;
};
-struct io_shutdown {
- struct file *file;
- int how;
-};
-
struct io_msg {
struct file *file;
u64 user_data;
u32 len;
};
-struct io_async_connect {
- struct sockaddr_storage address;
-};
-
-struct io_async_msghdr {
- struct iovec fast_iov[UIO_FASTIOV];
- /* points to an allocated iov, if NULL we use fast_iov instead */
- struct iovec *free_iov;
- struct sockaddr __user *uaddr;
- struct msghdr msg;
- struct sockaddr_storage addr;
-};
-
struct io_rw_state {
struct iov_iter iter;
struct iov_iter_state iter_state;
@@ -517,9 +460,6 @@ static void io_req_task_queue(struct io_kiocb *req);
static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
static int io_req_prep_async(struct io_kiocb *req);
-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
- unsigned int issue_flags, u32 slot_index);
-
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
@@ -808,8 +748,7 @@ static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
}
-static inline unsigned int io_put_kbuf(struct io_kiocb *req,
- unsigned issue_flags)
+inline unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
{
unsigned int cflags;
@@ -1291,12 +1230,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
spin_unlock_irq(&ctx->timeout_lock);
}
-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
-{
- /* order cqe stores with ring update */
- smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
-}
-
static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
if (ctx->off_timeout_used || ctx->drain_active) {
@@ -1418,7 +1351,7 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
* 1:1 relationship between how many times this function is called (and
* hence the eventfd count) and number of CQEs posted to the CQ ring.
*/
-static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
ctx->has_evfd))
@@ -1639,8 +1572,8 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
}
}
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
+ u32 cflags)
{
struct io_uring_cqe *cqe;
@@ -2980,8 +2913,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
return u64_to_user_ptr(buf->addr);
}
-static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
- unsigned int issue_flags)
+void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+ unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_buffer_list *bl;
@@ -3073,13 +3006,6 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
return __io_iov_buffer_select(req, iov, issue_flags);
}
-static inline bool io_do_buffer_select(struct io_kiocb *req)
-{
- if (!(req->flags & REQ_F_BUFFER_SELECT))
- return false;
- return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
-}
-
static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
struct io_rw_state *s,
unsigned int issue_flags)
@@ -4025,755 +3951,6 @@ static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb,
return -EOPNOTSUPP;
}
-#if defined(CONFIG_NET)
-static int io_shutdown_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
-{
- struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
-
- if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
- sqe->buf_index || sqe->splice_fd_in))
- return -EINVAL;
-
- shutdown->how = READ_ONCE(sqe->len);
- return 0;
-}
-
-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
- struct socket *sock;
- int ret;
-
- if (issue_flags & IO_URING_F_NONBLOCK)
- return -EAGAIN;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- ret = __sys_shutdown_sock(sock, shutdown->how);
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
-static bool io_net_retry(struct socket *sock, int flags)
-{
- if (!(flags & MSG_WAITALL))
- return false;
- return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
-}
-
-static int io_setup_async_msg(struct io_kiocb *req,
- struct io_async_msghdr *kmsg)
-{
- struct io_async_msghdr *async_msg = req->async_data;
-
- if (async_msg)
- return -EAGAIN;
- if (io_alloc_async_data(req)) {
- kfree(kmsg->free_iov);
- return -ENOMEM;
- }
- async_msg = req->async_data;
- req->flags |= REQ_F_NEED_CLEANUP;
- memcpy(async_msg, kmsg, sizeof(*kmsg));
- async_msg->msg.msg_name = &async_msg->addr;
- /* if were using fast_iov, set it to the new one */
- if (!async_msg->free_iov)
- async_msg->msg.msg_iter.iov = async_msg->fast_iov;
-
- return -EAGAIN;
-}
-
-static int io_sendmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->free_iov = iomsg->fast_iov;
- return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
- &iomsg->free_iov);
-}
-
-static int io_sendmsg_prep_async(struct io_kiocb *req)
-{
- int ret;
-
- ret = io_sendmsg_copy_hdr(req, req->async_data);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
-}
-
-static void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
-{
- struct io_async_msghdr *io = req->async_data;
-
- kfree(io->free_iov);
-}
-
-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
- if (unlikely(sqe->file_index || sqe->addr2))
- return -EINVAL;
-
- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
- sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->ioprio);
- if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
- return -EINVAL;
- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
- if (sr->msg_flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
-
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
- sr->done_io = 0;
- return 0;
-}
-
-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct io_async_msghdr iomsg, *kmsg;
- struct socket *sock;
- unsigned flags;
- int min_ret = 0;
- int ret;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- if (req_has_async_data(req)) {
- kmsg = req->async_data;
- } else {
- ret = io_sendmsg_copy_hdr(req, &iomsg);
- if (ret)
- return ret;
- kmsg = &iomsg;
- }
-
- if (!(req->flags & REQ_F_POLLED) &&
- (sr->flags & IORING_RECVSEND_POLL_FIRST))
- return io_setup_async_msg(req, kmsg);
-
- flags = sr->msg_flags;
- if (issue_flags & IO_URING_F_NONBLOCK)
- flags |= MSG_DONTWAIT;
- if (flags & MSG_WAITALL)
- min_ret = iov_iter_count(&kmsg->msg.msg_iter);
-
- ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
-
- if (ret < min_ret) {
- if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return io_setup_async_msg(req, kmsg);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- if (ret > 0 && io_net_retry(sock, flags)) {
- sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
- return io_setup_async_msg(req, kmsg);
- }
- req_set_fail(req);
- }
- /* fast path, check for non-NULL to avoid function call */
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret >= 0)
- ret += sr->done_io;
- else if (sr->done_io)
- ret = sr->done_io;
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct msghdr msg;
- struct iovec iov;
- struct socket *sock;
- unsigned flags;
- int min_ret = 0;
- int ret;
-
- if (!(req->flags & REQ_F_POLLED) &&
- (sr->flags & IORING_RECVSEND_POLL_FIRST))
- return -EAGAIN;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
- if (unlikely(ret))
- return ret;
-
- msg.msg_name = NULL;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_namelen = 0;
-
- flags = sr->msg_flags;
- if (issue_flags & IO_URING_F_NONBLOCK)
- flags |= MSG_DONTWAIT;
- if (flags & MSG_WAITALL)
- min_ret = iov_iter_count(&msg.msg_iter);
-
- msg.msg_flags = flags;
- ret = sock_sendmsg(sock, &msg);
- if (ret < min_ret) {
- if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- if (ret > 0 && io_net_retry(sock, flags)) {
- sr->len -= ret;
- sr->buf += ret;
- sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
- return -EAGAIN;
- }
- req_set_fail(req);
- }
- if (ret >= 0)
- ret += sr->done_io;
- else if (sr->done_io)
- ret = sr->done_io;
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
-static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct iovec __user *uiov;
- size_t iov_len;
- int ret;
-
- ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
- &iomsg->uaddr, &uiov, &iov_len);
- if (ret)
- return ret;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- if (iov_len > 1)
- return -EINVAL;
- if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
- return -EFAULT;
- sr->len = iomsg->fast_iov[0].iov_len;
- iomsg->free_iov = NULL;
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
- &iomsg->free_iov, &iomsg->msg.msg_iter,
- false);
- if (ret > 0)
- ret = 0;
- }
-
- return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct compat_iovec __user *uiov;
- compat_uptr_t ptr;
- compat_size_t len;
- int ret;
-
- ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
- &ptr, &len);
- if (ret)
- return ret;
-
- uiov = compat_ptr(ptr);
- if (req->flags & REQ_F_BUFFER_SELECT) {
- compat_ssize_t clen;
-
- if (len > 1)
- return -EINVAL;
- if (!access_ok(uiov, sizeof(*uiov)))
- return -EFAULT;
- if (__get_user(clen, &uiov->iov_len))
- return -EFAULT;
- if (clen < 0)
- return -EINVAL;
- sr->len = clen;
- iomsg->free_iov = NULL;
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
- UIO_FASTIOV, &iomsg->free_iov,
- &iomsg->msg.msg_iter, true);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-#endif
-
-static int io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- iomsg->msg.msg_name = &iomsg->addr;
-
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- return __io_compat_recvmsg_copy_hdr(req, iomsg);
-#endif
-
- return __io_recvmsg_copy_hdr(req, iomsg);
-}
-
-static int io_recvmsg_prep_async(struct io_kiocb *req)
-{
- int ret;
-
- ret = io_recvmsg_copy_hdr(req, req->async_data);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
-}
-
-static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
- if (unlikely(sqe->file_index || sqe->addr2))
- return -EINVAL;
-
- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
- sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->ioprio);
- if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
- return -EINVAL;
- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
- if (sr->msg_flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- if (sr->msg_flags & MSG_ERRQUEUE)
- req->flags |= REQ_F_CLEAR_POLLIN;
-
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
- sr->done_io = 0;
- return 0;
-}
-
-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct io_async_msghdr iomsg, *kmsg;
- struct socket *sock;
- unsigned int cflags;
- unsigned flags;
- int ret, min_ret = 0;
- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- if (req_has_async_data(req)) {
- kmsg = req->async_data;
- } else {
- ret = io_recvmsg_copy_hdr(req, &iomsg);
- if (ret)
- return ret;
- kmsg = &iomsg;
- }
-
- if (!(req->flags & REQ_F_POLLED) &&
- (sr->flags & IORING_RECVSEND_POLL_FIRST))
- return io_setup_async_msg(req, kmsg);
-
- if (io_do_buffer_select(req)) {
- void __user *buf;
-
- buf = io_buffer_select(req, &sr->len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- kmsg->fast_iov[0].iov_base = buf;
- kmsg->fast_iov[0].iov_len = sr->len;
- iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
- sr->len);
- }
-
- flags = sr->msg_flags;
- if (force_nonblock)
- flags |= MSG_DONTWAIT;
- if (flags & MSG_WAITALL)
- min_ret = iov_iter_count(&kmsg->msg.msg_iter);
-
- kmsg->msg.msg_get_inq = 1;
- ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
- if (ret < min_ret) {
- if (ret == -EAGAIN && force_nonblock)
- return io_setup_async_msg(req, kmsg);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- if (ret > 0 && io_net_retry(sock, flags)) {
- sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
- return io_setup_async_msg(req, kmsg);
- }
- req_set_fail(req);
- } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
- req_set_fail(req);
- }
-
- /* fast path, check for non-NULL to avoid function call */
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret >= 0)
- ret += sr->done_io;
- else if (sr->done_io)
- ret = sr->done_io;
- cflags = io_put_kbuf(req, issue_flags);
- if (kmsg->msg.msg_inq)
- cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- io_req_set_res(req, ret, cflags);
- return IOU_OK;
-}
-
-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
- struct msghdr msg;
- struct socket *sock;
- struct iovec iov;
- unsigned int cflags;
- unsigned flags;
- int ret, min_ret = 0;
- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
- if (!(req->flags & REQ_F_POLLED) &&
- (sr->flags & IORING_RECVSEND_POLL_FIRST))
- return -EAGAIN;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- if (io_do_buffer_select(req)) {
- void __user *buf;
-
- buf = io_buffer_select(req, &sr->len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- sr->buf = buf;
- }
-
- ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
- if (unlikely(ret))
- goto out_free;
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_get_inq = 1;
- msg.msg_flags = 0;
- msg.msg_controllen = 0;
- msg.msg_iocb = NULL;
-
- flags = sr->msg_flags;
- if (force_nonblock)
- flags |= MSG_DONTWAIT;
- if (flags & MSG_WAITALL)
- min_ret = iov_iter_count(&msg.msg_iter);
-
- ret = sock_recvmsg(sock, &msg, flags);
- if (ret < min_ret) {
- if (ret == -EAGAIN && force_nonblock)
- return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- if (ret > 0 && io_net_retry(sock, flags)) {
- sr->len -= ret;
- sr->buf += ret;
- sr->done_io += ret;
- req->flags |= REQ_F_PARTIAL_IO;
- return -EAGAIN;
- }
- req_set_fail(req);
- } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
-out_free:
- req_set_fail(req);
- }
-
- if (ret >= 0)
- ret += sr->done_io;
- else if (sr->done_io)
- ret = sr->done_io;
- cflags = io_put_kbuf(req, issue_flags);
- if (msg.msg_inq)
- cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- io_req_set_res(req, ret, cflags);
- return IOU_OK;
-}
-
-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_accept *accept = io_kiocb_to_cmd(req);
- unsigned flags;
-
- if (sqe->len || sqe->buf_index)
- return -EINVAL;
-
- accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
- accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
- accept->flags = READ_ONCE(sqe->accept_flags);
- accept->nofile = rlimit(RLIMIT_NOFILE);
- flags = READ_ONCE(sqe->ioprio);
- if (flags & ~IORING_ACCEPT_MULTISHOT)
- return -EINVAL;
-
- accept->file_slot = READ_ONCE(sqe->file_index);
- if (accept->file_slot) {
- if (accept->flags & SOCK_CLOEXEC)
- return -EINVAL;
- if (flags & IORING_ACCEPT_MULTISHOT &&
- accept->file_slot != IORING_FILE_INDEX_ALLOC)
- return -EINVAL;
- }
- if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
- if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
- accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- if (flags & IORING_ACCEPT_MULTISHOT)
- req->flags |= REQ_F_APOLL_MULTISHOT;
- return 0;
-}
-
-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_ring_ctx *ctx = req->ctx;
- struct io_accept *accept = io_kiocb_to_cmd(req);
- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
- unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
- bool fixed = !!accept->file_slot;
- struct file *file;
- int ret, fd;
-
-retry:
- if (!fixed) {
- fd = __get_unused_fd_flags(accept->flags, accept->nofile);
- if (unlikely(fd < 0))
- return fd;
- }
- file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
- accept->flags);
- if (IS_ERR(file)) {
- if (!fixed)
- put_unused_fd(fd);
- ret = PTR_ERR(file);
- if (ret == -EAGAIN && force_nonblock) {
- /*
- * if it's multishot and polled, we don't need to
- * return EAGAIN to arm the poll infra since it
- * has already been done
- */
- if ((req->flags & IO_APOLL_MULTI_POLLED) ==
- IO_APOLL_MULTI_POLLED)
- ret = IOU_ISSUE_SKIP_COMPLETE;
- return ret;
- }
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- req_set_fail(req);
- } else if (!fixed) {
- fd_install(fd, file);
- ret = fd;
- } else {
- ret = io_fixed_fd_install(req, issue_flags, file,
- accept->file_slot);
- }
-
- if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
- io_req_set_res(req, ret, 0);
- return IOU_OK;
- }
- if (ret >= 0) {
- bool filled;
-
- spin_lock(&ctx->completion_lock);
- filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
- IORING_CQE_F_MORE);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- if (filled) {
- io_cqring_ev_posted(ctx);
- goto retry;
- }
- ret = -ECANCELED;
- }
-
- return ret;
-}
-
-static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_socket *sock = io_kiocb_to_cmd(req);
-
- if (sqe->addr || sqe->rw_flags || sqe->buf_index)
- return -EINVAL;
-
- sock->domain = READ_ONCE(sqe->fd);
- sock->type = READ_ONCE(sqe->off);
- sock->protocol = READ_ONCE(sqe->len);
- sock->file_slot = READ_ONCE(sqe->file_index);
- sock->nofile = rlimit(RLIMIT_NOFILE);
-
- sock->flags = sock->type & ~SOCK_TYPE_MASK;
- if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
- return -EINVAL;
- if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
- return 0;
-}
-
-static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_socket *sock = io_kiocb_to_cmd(req);
- bool fixed = !!sock->file_slot;
- struct file *file;
- int ret, fd;
-
- if (!fixed) {
- fd = __get_unused_fd_flags(sock->flags, sock->nofile);
- if (unlikely(fd < 0))
- return fd;
- }
- file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
- if (IS_ERR(file)) {
- if (!fixed)
- put_unused_fd(fd);
- ret = PTR_ERR(file);
- if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- req_set_fail(req);
- } else if (!fixed) {
- fd_install(fd, file);
- ret = fd;
- } else {
- ret = io_fixed_fd_install(req, issue_flags, file,
- sock->file_slot);
- }
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
-static int io_connect_prep_async(struct io_kiocb *req)
-{
- struct io_async_connect *io = req->async_data;
- struct io_connect *conn = io_kiocb_to_cmd(req);
-
- return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
-}
-
-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_connect *conn = io_kiocb_to_cmd(req);
-
- if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
- return -EINVAL;
-
- conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
- conn->addr_len = READ_ONCE(sqe->addr2);
- return 0;
-}
-
-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_connect *connect = io_kiocb_to_cmd(req);
- struct io_async_connect __io, *io;
- unsigned file_flags;
- int ret;
- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
- if (req_has_async_data(req)) {
- io = req->async_data;
- } else {
- ret = move_addr_to_kernel(connect->addr,
- connect->addr_len,
- &__io.address);
- if (ret)
- goto out;
- io = &__io;
- }
-
- file_flags = force_nonblock ? O_NONBLOCK : 0;
-
- ret = __sys_connect_file(req->file, &io->address,
- connect->addr_len, file_flags);
- if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
- if (req_has_async_data(req))
- return -EAGAIN;
- if (io_alloc_async_data(req)) {
- ret = -ENOMEM;
- goto out;
- }
- memcpy(req->async_data, &__io, sizeof(__io));
- return -EAGAIN;
- }
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
-out:
- if (ret < 0)
- req_set_fail(req);
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-#else /* !CONFIG_NET */
-#define IO_NETOP_FN(op) \
-static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \
-{ \
- return -EOPNOTSUPP; \
-}
-
-#define IO_NETOP_PREP(op) \
-IO_NETOP_FN(op) \
-static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
-{ \
- return -EOPNOTSUPP; \
-} \
-
-#define IO_NETOP_PREP_ASYNC(op) \
-IO_NETOP_PREP(op) \
-static int io_##op##_prep_async(struct io_kiocb *req) \
-{ \
- return -EOPNOTSUPP; \
-}
-
-IO_NETOP_PREP_ASYNC(sendmsg);
-IO_NETOP_PREP_ASYNC(recvmsg);
-IO_NETOP_PREP_ASYNC(connect);
-IO_NETOP_PREP(accept);
-IO_NETOP_PREP(socket);
-IO_NETOP_PREP(shutdown);
-IO_NETOP_FN(send);
-IO_NETOP_FN(recv);
-#endif /* CONFIG_NET */
-
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -7874,8 +7051,8 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
return 0;
}
-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
- unsigned int issue_flags, u32 slot_index)
+int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index)
__must_hold(&req->ctx->uring_lock)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -10986,12 +10163,14 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.ioprio = 1,
+#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_msghdr),
.prep = io_sendmsg_prep,
.issue = io_sendmsg,
.prep_async = io_sendmsg_prep_async,
-#if defined(CONFIG_NET)
.cleanup = io_sendmsg_recvmsg_cleanup,
+#else
+ .prep = io_eopnotsupp_prep,
#endif
},
[IORING_OP_RECVMSG] = {
@@ -11000,12 +10179,14 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.buffer_select = 1,
.ioprio = 1,
+#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_msghdr),
.prep = io_recvmsg_prep,
.issue = io_recvmsg,
.prep_async = io_recvmsg_prep_async,
-#if defined(CONFIG_NET)
.cleanup = io_sendmsg_recvmsg_cleanup,
+#else
+ .prep = io_eopnotsupp_prep,
#endif
},
[IORING_OP_TIMEOUT] = {
@@ -11026,8 +10207,12 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.poll_exclusive = 1,
.ioprio = 1, /* used for flags */
+#if defined(CONFIG_NET)
.prep = io_accept_prep,
.issue = io_accept,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_ASYNC_CANCEL] = {
.audit_skip = 1,
@@ -11044,10 +10229,14 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
+#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_connect),
.prep = io_connect_prep,
.issue = io_connect,
.prep_async = io_connect_prep_async,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
@@ -11117,8 +10306,12 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.audit_skip = 1,
.ioprio = 1,
+#if defined(CONFIG_NET)
.prep = io_sendmsg_prep,
.issue = io_send,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_RECV] = {
.needs_file = 1,
@@ -11127,8 +10320,12 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
.audit_skip = 1,
.ioprio = 1,
+#if defined(CONFIG_NET)
.prep = io_recvmsg_prep,
.issue = io_recv,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_OPENAT2] = {
.prep = io_openat2_prep,
@@ -11175,8 +10372,12 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_SHUTDOWN] = {
.needs_file = 1,
+#if defined(CONFIG_NET)
.prep = io_shutdown_prep,
.issue = io_shutdown,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_RENAMEAT] = {
.prep = io_renameat_prep,
@@ -11233,8 +10434,12 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_SOCKET] = {
.audit_skip = 1,
+#if defined(CONFIG_NET)
.prep = io_socket_prep,
.issue = io_socket,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
},
[IORING_OP_URING_CMD] = {
.needs_file = 1,
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 6a07e902120a..4b46385720c5 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -58,13 +58,35 @@ static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
lockdep_assert_held(&ctx->uring_lock);
}
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+ /* order cqe stores with ring update */
+ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+}
+
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
+bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
+ u32 cflags);
+void io_cqring_ev_posted(struct io_ring_ctx *ctx);
+void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+ unsigned int issue_flags);
+unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+
+static inline bool io_do_buffer_select(struct io_kiocb *req)
+{
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return false;
+ return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+}
+
struct file *io_file_get_normal(struct io_kiocb *req, int fd);
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot);
+int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index);
int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
diff --git a/io_uring/net.c b/io_uring/net.c
new file mode 100644
index 000000000000..2434548d0c1f
--- /dev/null
+++ b/io_uring/net.c
@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/compat.h>
+#include <net/compat.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring_types.h"
+#include "io_uring.h"
+#include "net.h"
+
+#if defined(CONFIG_NET)
+struct io_shutdown {
+ struct file *file;
+ int how;
+};
+
+struct io_accept {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int __user *addr_len;
+ int flags;
+ u32 file_slot;
+ unsigned long nofile;
+};
+
+struct io_socket {
+ struct file *file;
+ int domain;
+ int type;
+ int protocol;
+ int flags;
+ u32 file_slot;
+ unsigned long nofile;
+};
+
+struct io_connect {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int addr_len;
+};
+
+struct io_sr_msg {
+ struct file *file;
+ union {
+ struct compat_msghdr __user *umsg_compat;
+ struct user_msghdr __user *umsg;
+ void __user *buf;
+ };
+ int msg_flags;
+ size_t len;
+ size_t done_io;
+ unsigned int flags;
+};
+
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
+int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+
+ if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
+ sqe->buf_index || sqe->splice_fd_in))
+ return -EINVAL;
+
+ shutdown->how = READ_ONCE(sqe->len);
+ return 0;
+}
+
+int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+ struct socket *sock;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = __sys_shutdown_sock(sock, shutdown->how);
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+
+static bool io_net_retry(struct socket *sock, int flags)
+{
+ if (!(flags & MSG_WAITALL))
+ return false;
+ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
+static int io_setup_async_msg(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg)
+{
+ struct io_async_msghdr *async_msg = req->async_data;
+
+ if (async_msg)
+ return -EAGAIN;
+ if (io_alloc_async_data(req)) {
+ kfree(kmsg->free_iov);
+ return -ENOMEM;
+ }
+ async_msg = req->async_data;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ memcpy(async_msg, kmsg, sizeof(*kmsg));
+ async_msg->msg.msg_name = &async_msg->addr;
+ /* if were using fast_iov, set it to the new one */
+ if (!async_msg->free_iov)
+ async_msg->msg.msg_iter.iov = async_msg->fast_iov;
+
+ return -EAGAIN;
+}
+
+static int io_sendmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+ iomsg->msg.msg_name = &iomsg->addr;
+ iomsg->free_iov = iomsg->fast_iov;
+ return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
+ &iomsg->free_iov);
+}
+
+int io_sendmsg_prep_async(struct io_kiocb *req)
+{
+ int ret;
+
+ ret = io_sendmsg_copy_hdr(req, req->async_data);
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
+}
+
+void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
+{
+ struct io_async_msghdr *io = req->async_data;
+
+ kfree(io->free_iov);
+}
+
+int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+ if (unlikely(sqe->file_index || sqe->addr2))
+ return -EINVAL;
+
+ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ sr->len = READ_ONCE(sqe->len);
+ sr->flags = READ_ONCE(sqe->ioprio);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
+ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+ if (sr->msg_flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+ sr->done_io = 0;
+ return 0;
+}
+
+int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_async_msghdr iomsg, *kmsg;
+ struct socket *sock;
+ unsigned flags;
+ int min_ret = 0;
+ int ret;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ if (req_has_async_data(req)) {
+ kmsg = req->async_data;
+ } else {
+ ret = io_sendmsg_copy_hdr(req, &iomsg);
+ if (ret)
+ return ret;
+ kmsg = &iomsg;
+ }
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
+ flags = sr->msg_flags;
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return io_setup_async_msg(req, kmsg);
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_msg(req, kmsg);
+ }
+ req_set_fail(req);
+ }
+ /* fast path, check for non-NULL to avoid function call */
+ if (kmsg->free_iov)
+ kfree(kmsg->free_iov);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+
+int io_send(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct msghdr msg;
+ struct iovec iov;
+ struct socket *sock;
+ unsigned flags;
+ int min_ret = 0;
+ int ret;
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
+ if (unlikely(ret))
+ return ret;
+
+ msg.msg_name = NULL;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+
+ flags = sr->msg_flags;
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
+ msg.msg_flags = flags;
+ ret = sock_sendmsg(sock, &msg);
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return -EAGAIN;
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return -EAGAIN;
+ }
+ req_set_fail(req);
+ }
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+
+static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct iovec __user *uiov;
+ size_t iov_len;
+ int ret;
+
+ ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
+ &iomsg->uaddr, &uiov, &iov_len);
+ if (ret)
+ return ret;
+
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (iov_len > 1)
+ return -EINVAL;
+ if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
+ return -EFAULT;
+ sr->len = iomsg->fast_iov[0].iov_len;
+ iomsg->free_iov = NULL;
+ } else {
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+ &iomsg->free_iov, &iomsg->msg.msg_iter,
+ false);
+ if (ret > 0)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct compat_iovec __user *uiov;
+ compat_uptr_t ptr;
+ compat_size_t len;
+ int ret;
+
+ ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
+ &ptr, &len);
+ if (ret)
+ return ret;
+
+ uiov = compat_ptr(ptr);
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ compat_ssize_t clen;
+
+ if (len > 1)
+ return -EINVAL;
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+ sr->len = clen;
+ iomsg->free_iov = NULL;
+ } else {
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+ UIO_FASTIOV, &iomsg->free_iov,
+ &iomsg->msg.msg_iter, true);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
+{
+ iomsg->msg.msg_name = &iomsg->addr;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ return __io_compat_recvmsg_copy_hdr(req, iomsg);
+#endif
+
+ return __io_recvmsg_copy_hdr(req, iomsg);
+}
+
+int io_recvmsg_prep_async(struct io_kiocb *req)
+{
+ int ret;
+
+ ret = io_recvmsg_copy_hdr(req, req->async_data);
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
+}
+
+int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+ if (unlikely(sqe->file_index || sqe->addr2))
+ return -EINVAL;
+
+ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ sr->len = READ_ONCE(sqe->len);
+ sr->flags = READ_ONCE(sqe->ioprio);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
+ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+ if (sr->msg_flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ if (sr->msg_flags & MSG_ERRQUEUE)
+ req->flags |= REQ_F_CLEAR_POLLIN;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+ sr->done_io = 0;
+ return 0;
+}
+
+int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_async_msghdr iomsg, *kmsg;
+ struct socket *sock;
+ unsigned int cflags;
+ unsigned flags;
+ int ret, min_ret = 0;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ if (req_has_async_data(req)) {
+ kmsg = req->async_data;
+ } else {
+ ret = io_recvmsg_copy_hdr(req, &iomsg);
+ if (ret)
+ return ret;
+ kmsg = &iomsg;
+ }
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
+ if (io_do_buffer_select(req)) {
+ void __user *buf;
+
+ buf = io_buffer_select(req, &sr->len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ kmsg->fast_iov[0].iov_base = buf;
+ kmsg->fast_iov[0].iov_len = sr->len;
+ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
+ sr->len);
+ }
+
+ flags = sr->msg_flags;
+ if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+ kmsg->msg.msg_get_inq = 1;
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock)
+ return io_setup_async_msg(req, kmsg);
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_msg(req, kmsg);
+ }
+ req_set_fail(req);
+ } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+ req_set_fail(req);
+ }
+
+ /* fast path, check for non-NULL to avoid function call */
+ if (kmsg->free_iov)
+ kfree(kmsg->free_iov);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+ cflags = io_put_kbuf(req, issue_flags);
+ if (kmsg->msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ io_req_set_res(req, ret, cflags);
+ return IOU_OK;
+}
+
+int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct msghdr msg;
+ struct socket *sock;
+ struct iovec iov;
+ unsigned int cflags;
+ unsigned flags;
+ int ret, min_ret = 0;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ if (io_do_buffer_select(req)) {
+ void __user *buf;
+
+ buf = io_buffer_select(req, &sr->len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ sr->buf = buf;
+ }
+
+ ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
+ if (unlikely(ret))
+ goto out_free;
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = NULL;
+ msg.msg_get_inq = 1;
+ msg.msg_flags = 0;
+ msg.msg_controllen = 0;
+ msg.msg_iocb = NULL;
+
+ flags = sr->msg_flags;
+ if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
+ ret = sock_recvmsg(sock, &msg, flags);
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock)
+ return -EAGAIN;
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return -EAGAIN;
+ }
+ req_set_fail(req);
+ } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+out_free:
+ req_set_fail(req);
+ }
+
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+ cflags = io_put_kbuf(req, issue_flags);
+ if (msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ io_req_set_res(req, ret, cflags);
+ return IOU_OK;
+}
+
+int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_accept *accept = io_kiocb_to_cmd(req);
+ unsigned flags;
+
+ if (sqe->len || sqe->buf_index)
+ return -EINVAL;
+
+ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ accept->flags = READ_ONCE(sqe->accept_flags);
+ accept->nofile = rlimit(RLIMIT_NOFILE);
+ flags = READ_ONCE(sqe->ioprio);
+ if (flags & ~IORING_ACCEPT_MULTISHOT)
+ return -EINVAL;
+
+ accept->file_slot = READ_ONCE(sqe->file_index);
+ if (accept->file_slot) {
+ if (accept->flags & SOCK_CLOEXEC)
+ return -EINVAL;
+ if (flags & IORING_ACCEPT_MULTISHOT &&
+ accept->file_slot != IORING_FILE_INDEX_ALLOC)
+ return -EINVAL;
+ }
+ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+ if (flags & IORING_ACCEPT_MULTISHOT)
+ req->flags |= REQ_F_APOLL_MULTISHOT;
+ return 0;
+}
+
+int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_accept *accept = io_kiocb_to_cmd(req);
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
+ bool fixed = !!accept->file_slot;
+ struct file *file;
+ int ret, fd;
+
+retry:
+ if (!fixed) {
+ fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+ accept->flags);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && force_nonblock) {
+ /*
+ * if it's multishot and polled, we don't need to
+ * return EAGAIN to arm the poll infra since it
+ * has already been done
+ */
+ if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+ IO_APOLL_MULTI_POLLED)
+ ret = IOU_ISSUE_SKIP_COMPLETE;
+ return ret;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ accept->file_slot);
+ }
+
+ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+ }
+ if (ret >= 0) {
+ bool filled;
+
+ spin_lock(&ctx->completion_lock);
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+ IORING_CQE_F_MORE);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ if (filled) {
+ io_cqring_ev_posted(ctx);
+ goto retry;
+ }
+ ret = -ECANCELED;
+ }
+
+ return ret;
+}
+
+int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_socket *sock = io_kiocb_to_cmd(req);
+
+ if (sqe->addr || sqe->rw_flags || sqe->buf_index)
+ return -EINVAL;
+
+ sock->domain = READ_ONCE(sqe->fd);
+ sock->type = READ_ONCE(sqe->off);
+ sock->protocol = READ_ONCE(sqe->len);
+ sock->file_slot = READ_ONCE(sqe->file_index);
+ sock->nofile = rlimit(RLIMIT_NOFILE);
+
+ sock->flags = sock->type & ~SOCK_TYPE_MASK;
+ if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+ return -EINVAL;
+ if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ return 0;
+}
+
+int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_socket *sock = io_kiocb_to_cmd(req);
+ bool fixed = !!sock->file_slot;
+ struct file *file;
+ int ret, fd;
+
+ if (!fixed) {
+ fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return -EAGAIN;
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ sock->file_slot);
+ }
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+
+int io_connect_prep_async(struct io_kiocb *req)
+{
+ struct io_async_connect *io = req->async_data;
+ struct io_connect *conn = io_kiocb_to_cmd(req);
+
+ return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+}
+
+int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_connect *conn = io_kiocb_to_cmd(req);
+
+ if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
+ return -EINVAL;
+
+ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ conn->addr_len = READ_ONCE(sqe->addr2);
+ return 0;
+}
+
+int io_connect(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_connect *connect = io_kiocb_to_cmd(req);
+ struct io_async_connect __io, *io;
+ unsigned file_flags;
+ int ret;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+ if (req_has_async_data(req)) {
+ io = req->async_data;
+ } else {
+ ret = move_addr_to_kernel(connect->addr,
+ connect->addr_len,
+ &__io.address);
+ if (ret)
+ goto out;
+ io = &__io;
+ }
+
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+ ret = __sys_connect_file(req->file, &io->address,
+ connect->addr_len, file_flags);
+ if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
+ if (req_has_async_data(req))
+ return -EAGAIN;
+ if (io_alloc_async_data(req)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(req->async_data, &__io, sizeof(__io));
+ return -EAGAIN;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+out:
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+#endif
diff --git a/io_uring/net.h b/io_uring/net.h
new file mode 100644
index 000000000000..81d71d164770
--- /dev/null
+++ b/io_uring/net.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/net.h>
+#include <linux/uio.h>
+
+#if defined(CONFIG_NET)
+struct io_async_msghdr {
+ struct iovec fast_iov[UIO_FASTIOV];
+ /* points to an allocated iov, if NULL we use fast_iov instead */
+ struct iovec *free_iov;
+ struct sockaddr __user *uaddr;
+ struct msghdr msg;
+ struct sockaddr_storage addr;
+};
+
+struct io_async_connect {
+ struct sockaddr_storage address;
+};
+
+int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_shutdown(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_sendmsg_prep_async(struct io_kiocb *req);
+void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req);
+int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags);
+int io_send(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_recvmsg_prep_async(struct io_kiocb *req);
+int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags);
+int io_recv(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_accept(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_socket(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_connect_prep_async(struct io_kiocb *req);
+int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_connect(struct io_kiocb *req, unsigned int issue_flags);
+#endif