summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/attr.c26
-rw-r--r--fs/ext2/dir.c9
-rw-r--r--fs/io_uring.c347
-rw-r--r--fs/nfs/callback_proc.c1
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/nfs4file.c1
-rw-r--r--fs/nfs/pnfs.c21
-rw-r--r--fs/nfs/pnfs.h1
8 files changed, 184 insertions, 223 deletions
diff --git a/fs/attr.c b/fs/attr.c
index 66899b6e9bd8..dbe996b0dedf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -61,9 +61,15 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
const struct inode *inode, kgid_t gid)
{
kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
- return true;
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+ kgid_t mapped_gid;
+
+ if (gid_eq(gid, inode->i_gid))
+ return true;
+ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
+ if (in_group_p(mapped_gid))
+ return true;
+ }
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
if (gid_eq(kgid, INVALID_GID) &&
@@ -123,12 +129,20 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
+ kgid_t mapped_gid;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
+
+ if (ia_valid & ATTR_GID)
+ mapped_gid = mapped_kgid_fs(mnt_userns,
+ i_user_ns(inode), attr->ia_gid);
+ else
+ mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- i_gid_into_mnt(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!in_group_p(mapped_gid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2c2f179b6977..43de293cef56 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode)
void *page_addr = NULL;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
- int dir_has_error = 0;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ext2_get_page(inode, i, dir_has_error, &page_addr);
+ page = ext2_get_page(inode, i, 0, &page_addr);
- if (IS_ERR(page)) {
- dir_has_error = 1;
- continue;
- }
+ if (IS_ERR(page))
+ goto not_empty;
kaddr = page_addr;
de = (ext2_dirent *)kaddr;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3aab4182fd89..d3ee4fc532fa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -298,8 +298,8 @@ struct io_buffer_list {
/* below is for ring provided buffers */
__u16 buf_nr_pages;
__u16 nr_entries;
- __u32 head;
- __u32 mask;
+ __u16 head;
+ __u16 mask;
};
struct io_buffer {
@@ -576,7 +576,6 @@ struct io_close {
struct file *file;
int fd;
u32 file_slot;
- u32 flags;
};
struct io_timeout_data {
@@ -784,12 +783,6 @@ struct io_msg {
u32 len;
};
-struct io_nop {
- struct file *file;
- u64 extra1;
- u64 extra2;
-};
-
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -851,6 +844,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
@@ -920,6 +914,8 @@ enum {
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ /* ->extra1 and ->extra2 are initialised */
+ REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
};
struct async_poll {
@@ -994,7 +990,6 @@ struct io_kiocb {
struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
- struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
@@ -1121,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_NOP] = {
.audit_skip = 1,
.iopoll = 1,
- .buffer_select = 1,
},
[IORING_OP_READV] = {
.needs_file = 1,
@@ -1729,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
- /* don't recycle if we already did IO to this buffer */
- if (req->flags & REQ_F_PARTIAL_IO)
+ /*
+ * For legacy provided buffer mode, don't recycle if we already did
+ * IO to this buffer. For ring-mapped provided buffer mode, we should
+ * increment ring->head to explicitly monopolize the buffer to avoid
+ * multiple use.
+ */
+ if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+ (req->flags & REQ_F_PARTIAL_IO))
return;
+
/*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
@@ -1739,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
*/
if (req->flags & REQ_F_BUFFER_RING) {
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
+ if (req->flags & REQ_F_PARTIAL_IO) {
+ req->buf_list->head++;
+ req->buf_list = NULL;
+ } else {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
}
return;
}
@@ -2441,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
return true;
}
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
{
struct io_uring_cqe *cqe;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, user_data);
- WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, cflags);
- return true;
- }
- return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+ if (!(ctx->flags & IORING_SETUP_CQE32)) {
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ 0, 0);
+ } else {
+ u64 extra1 = 0, extra2 = 0;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(*cqe));
- return true;
- }
- return io_cqring_event_overflow(ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
-}
+ if (req->flags & REQ_F_CQE32_INIT) {
+ extra1 = req->extra1;
+ extra2 = req->extra2;
+ }
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
- u64 extra1 = req->extra1;
- u64 extra2 = req->extra2;
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, extra1, extra2);
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return true;
+ }
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
- cqe->big_cqe[0] = extra1;
- cqe->big_cqe[1] = extra2;
- return true;
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ extra1, extra2);
}
-
- return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, extra1, extra2);
-}
-
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
- return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
- return;
- if (req->flags & REQ_F_CQE_SKIP)
- return;
-
- trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
- extra1, extra2);
+ ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
/*
* If we can't get a cq entry, userspace overflowed the
@@ -2537,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags
*/
cqe = io_get_cqe(ctx);
if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- WRITE_ONCE(cqe->big_cqe[0], extra1);
- WRITE_ONCE(cqe->big_cqe[1], extra2);
- return;
- }
- io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
-{
- ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- return __io_fill_cqe(ctx, user_data, res, cflags);
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ WRITE_ONCE(cqe->big_cqe[0], 0);
+ WRITE_ONCE(cqe->big_cqe[1], 0);
+ }
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
}
static void __io_req_complete_put(struct io_kiocb *req)
@@ -2590,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req)
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
- __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ __io_fill_cqe_req(req->ctx, req);
+ }
__io_req_complete_put(req);
}
@@ -2614,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
io_cqring_ev_posted(ctx);
}
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_req_complete_post32(req, res, cflags, extra1, extra2);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-}
-
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2643,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
io_req_complete_post(req, res, cflags);
}
-static inline void __io_req_complete32(struct io_kiocb *req,
- unsigned int issue_flags, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
- io_req_complete_state(req, res, cflags);
- req->extra1 = extra1;
- req->extra2 = extra2;
- } else {
- io_req_complete_post32(req, res, cflags, extra1, extra2);
- }
-}
-
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
if (res < 0)
@@ -3202,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP)) {
- if (!(ctx->flags & IORING_SETUP_CQE32))
- __io_fill_cqe_req_filled(ctx, req);
- else
- __io_fill_cqe32_req_filled(ctx, req);
- }
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(ctx, req);
}
io_commit_cqring(ctx);
@@ -3326,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+ req->cqe.flags = io_put_kbuf(req, 0);
+ __io_fill_cqe_req(req->ctx, req);
}
if (unlikely(!nr_events))
@@ -3677,6 +3617,20 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
kiocb->ki_pos = READ_ONCE(sqe->off);
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED) {
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3689,12 +3643,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kiocb->ki_ioprio = get_current_ioprio();
}
- req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
- /* used for fixed read/write too - just read unconditionally */
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3826,20 +3777,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
unsigned int issue_flags)
{
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- io_req_set_rsrc_node(req, ctx, issue_flags);
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -3876,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
{
struct io_uring_buf_ring *br = bl->buf_ring;
struct io_uring_buf *buf;
- __u32 head = bl->head;
+ __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head)) {
- io_ring_submit_unlock(req->ctx, issue_flags);
+ if (unlikely(smp_load_acquire(&br->tail) == head))
return NULL;
- }
head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
@@ -3898,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here. This does mean it'll be pinned until the IO
@@ -5079,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+ u64 extra1, u64 extra2)
+{
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ req->flags |= REQ_F_CQE32_INIT;
+}
+
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
@@ -5093,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
if (ret < 0)
req_set_fail(req);
+
if (req->ctx->flags & IORING_SETUP_CQE32)
- __io_req_complete32(req, 0, ret, 0, res2, 0);
- else
- io_req_complete(req, ret);
+ io_req_set_cqe32_extra(req, res2, 0);
+ io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
@@ -5258,14 +5204,6 @@ done:
static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If the ring is setup with CQE32, relay back addr/addr
- */
- if (req->ctx->flags & IORING_SETUP_CQE32) {
- req->nop.extra1 = READ_ONCE(sqe->addr);
- req->nop.extra2 = READ_ONCE(sqe->addr2);
- }
-
return 0;
}
@@ -5274,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- unsigned int cflags;
- void __user *buf;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- size_t len = 1;
-
- buf = io_buffer_select(req, &len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- }
-
- cflags = io_put_kbuf(req, issue_flags);
- if (!(req->ctx->flags & IORING_SETUP_CQE32))
- __io_req_complete(req, issue_flags, 0, cflags);
- else
- __io_req_complete32(req, issue_flags, 0, cflags,
- req->nop.extra1, req->nop.extra2);
+ __io_req_complete(req, issue_flags, 0, 0);
return 0;
}
@@ -5988,18 +5910,14 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
- req->close.flags = READ_ONCE(sqe->close_flags);
- if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
- return -EINVAL;
- if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
- req->close.file_slot && req->close.fd)
+ if (req->close.file_slot && req->close.fd)
return -EINVAL;
return 0;
@@ -6015,8 +5933,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
- if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
- goto err;
+ goto err;
}
spin_lock(&files->file_lock);
@@ -8063,8 +7980,8 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
if (ret < 0)
break;
if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
- ret = -EFAULT;
__io_close_fixed(req, issue_flags, ret);
+ ret = -EFAULT;
break;
}
}
@@ -8773,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req, 0);
io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
@@ -9788,11 +9706,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
@@ -10690,12 +10616,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
@@ -13002,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
+ /* cannot disambiguate full vs empty due to head/tail size */
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c8520284dda7..c1eda73254e1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
rv = NFS4_OK;
break;
case -ENOENT:
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
/* Embrace your forgetfulness! */
rv = NFS4ERR_NOMATCHING_LAYOUT;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8ecdd527662..0c4e8dd6aa96 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2124,6 +2124,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
goto out;
}
+ file->f_mode |= FMODE_CAN_ODIRECT;
err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 03d3a270eff4..e88f6b18445e 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -93,6 +93,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
+ filp->f_mode |= FMODE_CAN_ODIRECT;
out_put_ctx:
put_nfs_open_context(ctx);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 68a87be3e6f9..41a9b6b58fb9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -469,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
pnfs_clear_lseg_state(lseg, lseg_list);
pnfs_clear_layoutreturn_info(lo);
pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
!test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
pnfs_clear_layoutreturn_waitbit(lo);
@@ -1917,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
- if (atomic_dec_and_test(&lo->plh_outstanding))
- wake_up_var(&lo->plh_outstanding);
+ if (atomic_dec_and_test(&lo->plh_outstanding) &&
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
@@ -2025,11 +2027,11 @@ lookup_again:
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
- !atomic_read(&lo->plh_outstanding)));
+ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
+ TASK_KILLABLE));
if (IS_ERR(lseg))
goto out_put_layout_hdr;
pnfs_put_layout_hdr(lo);
@@ -2152,6 +2154,12 @@ lookup_again:
case -ERECALLCONFLICT:
case -EAGAIN:
break;
+ case -ENODATA:
+ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
+ pnfs_layout_set_fail_bit(
+ lo, pnfs_iomode_to_fail_bit(iomode));
+ lseg = NULL;
+ goto out_put_layout_hdr;
default:
if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
@@ -2407,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget;
}
- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
+ !pnfs_is_first_layoutget(lo))
goto out_forget;
if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 07f11489e4e9..f331f067691b 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -105,6 +105,7 @@ enum {
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
NFS_LAYOUT_HASHED, /* The layout visible */
+ NFS_LAYOUT_DRAIN,
};
enum layoutdriver_policy_flags {