summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c5
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c186
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/delalloc-space.c12
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/extent_io.c22
-rw-r--r--fs/btrfs/free-space-tree.c4
-rw-r--r--fs/btrfs/ioctl.c16
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/tree-log.c7
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/ceph/caps.c16
-rw-r--r--fs/ceph/file.c20
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/cifs/connect.c7
-rw-r--r--fs/cifs/fs_context.c38
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/cifs/sess.c54
-rw-r--r--fs/file.c72
-rw-r--r--fs/io-wq.c31
-rw-r--r--fs/io_uring.c16
-rw-r--r--fs/ksmbd/ndr.c2
-rw-r--r--fs/ksmbd/smb2ops.c3
-rw-r--r--fs/ksmbd/smb2pdu.c29
-rw-r--r--fs/namespace.c9
-rw-r--r--fs/netfs/read_helper.c21
-rw-r--r--fs/nfsd/nfs3proc.c11
-rw-r--r--fs/nfsd/nfs4recover.c1
-rw-r--r--fs/nfsd/nfs4state.c9
-rw-r--r--fs/nfsd/nfsctl.c14
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/signalfd.c12
-rw-r--r--fs/smbfs_common/cifs_arc4.c13
-rw-r--r--fs/tracefs/inode.c76
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_super.c14
-rw-r--r--fs/zonefs/super.c1
42 files changed, 594 insertions, 217 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index cb6ad61eec3b..afe4b803f84b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -514,8 +514,9 @@ static void afs_add_open_mmap(struct afs_vnode *vnode)
if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
down_write(&vnode->volume->cell->fs_open_mmaps_lock);
- list_add_tail(&vnode->cb_mmap_link,
- &vnode->volume->cell->fs_open_mmaps);
+ if (list_empty(&vnode->cb_mmap_link))
+ list_add_tail(&vnode->cb_mmap_link,
+ &vnode->volume->cell->fs_open_mmaps);
up_write(&vnode->volume->cell->fs_open_mmaps_lock);
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index d110def8aa8e..34c68724c98b 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -667,6 +667,7 @@ static void afs_i_init_once(void *_vnode)
INIT_LIST_HEAD(&vnode->pending_locks);
INIT_LIST_HEAD(&vnode->granted_locks);
INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
+ INIT_LIST_HEAD(&vnode->cb_mmap_link);
seqlock_init(&vnode->cb_lock);
}
diff --git a/fs/aio.c b/fs/aio.c
index 9c81cf611d65..f6f1cbffef9e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,8 +181,9 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool done;
bool cancelled;
+ bool work_scheduled;
+ bool work_need_resched;
struct wait_queue_entry wait;
struct work_struct work;
};
@@ -1619,6 +1620,51 @@ static void aio_poll_put_work(struct work_struct *work)
iocb_put(iocb);
}
+/*
+ * Safely lock the waitqueue which the request is on, synchronizing with the
+ * case where the ->poll() provider decides to free its waitqueue early.
+ *
+ * Returns true on success, meaning that req->head->lock was locked, req->wait
+ * is on req->head, and an RCU read lock was taken. Returns false if the
+ * request was already removed from its waitqueue (which might no longer exist).
+ */
+static bool poll_iocb_lock_wq(struct poll_iocb *req)
+{
+ wait_queue_head_t *head;
+
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us, then check whether the request is still on the queue.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ head = smp_load_acquire(&req->head);
+ if (head) {
+ spin_lock(&head->lock);
+ if (!list_empty(&req->wait.entry))
+ return true;
+ spin_unlock(&head->lock);
+ }
+ rcu_read_unlock();
+ return false;
+}
+
+static void poll_iocb_unlock_wq(struct poll_iocb *req)
+{
+ spin_unlock(&req->head->lock);
+ rcu_read_unlock();
+}
+
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1638,14 +1684,27 @@ static void aio_poll_complete_work(struct work_struct *work)
* avoid further branches in the fast path.
*/
spin_lock_irq(&ctx->ctx_lock);
- if (!mask && !READ_ONCE(req->cancelled)) {
- add_wait_queue(req->head, &req->wait);
- spin_unlock_irq(&ctx->ctx_lock);
- return;
- }
+ if (poll_iocb_lock_wq(req)) {
+ if (!mask && !READ_ONCE(req->cancelled)) {
+ /*
+ * The request isn't actually ready to be completed yet.
+ * Reschedule completion if another wakeup came in.
+ */
+ if (req->work_need_resched) {
+ schedule_work(&req->work);
+ req->work_need_resched = false;
+ } else {
+ req->work_scheduled = false;
+ }
+ poll_iocb_unlock_wq(req);
+ spin_unlock_irq(&ctx->ctx_lock);
+ return;
+ }
+ list_del_init(&req->wait.entry);
+ poll_iocb_unlock_wq(req);
+ } /* else, POLLFREE has freed the waitqueue, so we must complete */
list_del_init(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
- req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
iocb_put(iocb);
@@ -1657,13 +1716,14 @@ static int aio_poll_cancel(struct kiocb *iocb)
struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
struct poll_iocb *req = &aiocb->poll;
- spin_lock(&req->head->lock);
- WRITE_ONCE(req->cancelled, true);
- if (!list_empty(&req->wait.entry)) {
- list_del_init(&req->wait.entry);
- schedule_work(&aiocb->poll.work);
- }
- spin_unlock(&req->head->lock);
+ if (poll_iocb_lock_wq(req)) {
+ WRITE_ONCE(req->cancelled, true);
+ if (!req->work_scheduled) {
+ schedule_work(&aiocb->poll.work);
+ req->work_scheduled = true;
+ }
+ poll_iocb_unlock_wq(req);
+ } /* else, the request was force-cancelled by POLLFREE already */
return 0;
}
@@ -1680,21 +1740,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & req->events))
return 0;
- list_del_init(&req->wait.entry);
-
- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ /*
+ * Complete the request inline if possible. This requires that three
+ * conditions be met:
+ * 1. An event mask must have been passed. If a plain wakeup was done
+ * instead, then mask == 0 and we have to call vfs_poll() to get
+ * the events, so inline completion isn't possible.
+ * 2. The completion work must not have already been scheduled.
+ * 3. ctx_lock must not be busy. We have to use trylock because we
+ * already hold the waitqueue lock, so this inverts the normal
+ * locking order. Use irqsave/irqrestore because not all
+ * filesystems (e.g. fuse) call this function with IRQs disabled,
+ * yet IRQs have to be disabled before ctx_lock is obtained.
+ */
+ if (mask && !req->work_scheduled &&
+ spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
struct kioctx *ctx = iocb->ki_ctx;
- /*
- * Try to complete the iocb inline if we can. Use
- * irqsave/irqrestore because not all filesystems (e.g. fuse)
- * call this function with IRQs disabled and because IRQs
- * have to be disabled before ctx_lock is obtained.
- */
+ list_del_init(&req->wait.entry);
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
- req->done = true;
- if (iocb->ki_eventfd && eventfd_signal_allowed()) {
+ if (iocb->ki_eventfd && !eventfd_signal_allowed()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
@@ -1703,7 +1769,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (iocb)
iocb_put(iocb);
} else {
- schedule_work(&req->work);
+ /*
+ * Schedule the completion work if needed. If it was already
+ * scheduled, record that another wakeup came in.
+ *
+ * Don't remove the request from the waitqueue here, as it might
+ * not actually be complete yet (we won't know until vfs_poll()
+ * is called), and we must not miss any wakeups. POLLFREE is an
+ * exception to this; see below.
+ */
+ if (req->work_scheduled) {
+ req->work_need_resched = true;
+ } else {
+ schedule_work(&req->work);
+ req->work_scheduled = true;
+ }
+
+ /*
+ * If the waitqueue is being freed early but we can't complete
+ * the request inline, we have to tear down the request as best
+ * we can. That means immediately removing the request from its
+ * waitqueue and preventing all further accesses to the
+ * waitqueue via the request. We also need to schedule the
+ * completion work (done above). Also mark the request as
+ * cancelled, to potentially skip an unneeded call to ->poll().
+ */
+ if (mask & POLLFREE) {
+ WRITE_ONCE(req->cancelled, true);
+ list_del_init(&req->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&req->head, NULL);
+ }
}
return 1;
}
@@ -1711,6 +1813,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct aio_poll_table {
struct poll_table_struct pt;
struct aio_kiocb *iocb;
+ bool queued;
int error;
};
@@ -1721,11 +1824,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
/* multiple wait queues per file are not supported */
- if (unlikely(pt->iocb->poll.head)) {
+ if (unlikely(pt->queued)) {
pt->error = -EINVAL;
return;
}
+ pt->queued = true;
pt->error = 0;
pt->iocb->poll.head = head;
add_wait_queue(head, &pt->iocb->poll.wait);
@@ -1750,12 +1854,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->head = NULL;
- req->done = false;
req->cancelled = false;
+ req->work_scheduled = false;
+ req->work_need_resched = false;
apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events;
apt.iocb = aiocb;
+ apt.queued = false;
apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
/* initialized the list so that we can do list_empty checks */
@@ -1764,23 +1870,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
mask = vfs_poll(req->file, &apt.pt) & req->events;
spin_lock_irq(&ctx->ctx_lock);
- if (likely(req->head)) {
- spin_lock(&req->head->lock);
- if (unlikely(list_empty(&req->wait.entry))) {
- if (apt.error)
+ if (likely(apt.queued)) {
+ bool on_queue = poll_iocb_lock_wq(req);
+
+ if (!on_queue || req->work_scheduled) {
+ /*
+ * aio_poll_wake() already either scheduled the async
+ * completion work, or completed the request inline.
+ */
+ if (apt.error) /* unsupported case: multiple queues */
cancel = true;
apt.error = 0;
mask = 0;
}
if (mask || apt.error) {
+ /* Steal to complete synchronously. */
list_del_init(&req->wait.entry);
} else if (cancel) {
+ /* Cancel if possible (may be too late though). */
WRITE_ONCE(req->cancelled, true);
- } else if (!req->done) { /* actually waiting for an event */
+ } else if (on_queue) {
+ /*
+ * Actually waiting for an event, so add the request to
+ * active_reqs so that it can be cancelled if needed.
+ */
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
aiocb->ki_cancel = aio_poll_cancel;
}
- spin_unlock(&req->head->lock);
+ if (on_queue)
+ poll_iocb_unlock_wq(req);
}
if (mask) { /* no async, we'd stolen it */
aiocb->ki_res.res = mangle_poll(mask);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3983bdaf4b8..f704339c6b86 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -463,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, cow);
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -485,8 +485,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
}
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -927,7 +927,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid);
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
/* once for the root ptr */
free_extent_buffer_stale(mid);
return 0;
@@ -986,7 +986,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
- btrfs_free_tree_block(trans, root, right, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), right,
+ 0, 1);
free_extent_buffer_stale(right);
right = NULL;
} else {
@@ -1031,7 +1032,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
mid = NULL;
} else {
@@ -4032,7 +4033,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
atomic_inc(&leaf->refs);
- btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
}
/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7553e9dc5f93..5fe5eccb3c87 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2257,6 +2257,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root)
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
}
+static inline u64 btrfs_root_id(const struct btrfs_root *root)
+{
+ return root->root_key.objectid;
+}
+
/* struct btrfs_root_backup */
BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
tree_root, 64);
@@ -2719,7 +2724,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
u64 empty_size,
enum btrfs_lock_nesting nest);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ u64 root_id,
struct extent_buffer *buf,
u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 2059d1504149..40c4d6ba3fb9 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -143,10 +143,13 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
- if (ret < 0)
+ if (ret < 0) {
btrfs_free_reserved_data_space_noquota(fs_info, len);
- else
+ extent_changeset_free(*reserved);
+ *reserved = NULL;
+ } else {
ret = 0;
+ }
return ret;
}
@@ -452,8 +455,11 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(inode, len);
- if (ret < 0)
+ if (ret < 0) {
btrfs_free_reserved_data_space(inode, *reserved, start, len);
+ extent_changeset_free(*reserved);
+ *reserved = NULL;
+ }
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 514ead6e93b6..b3f2e2232326 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1732,6 +1732,14 @@ again:
}
return root;
fail:
+ /*
+ * If our caller provided us an anonymous device, then it's his
+ * responsability to free it in case we fail. So we have to set our
+ * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
+ * and once again by our caller.
+ */
+ if (anon_dev)
+ root->anon_dev = 0;
btrfs_put_root(root);
return ERR_PTR(ret);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3fd736a02c1e..25ef6e3fd306 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3275,20 +3275,20 @@ out_delayed_unlock:
}
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ u64 root_id,
struct extent_buffer *buf,
u64 parent, int last_ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_ref generic_ref = { 0 };
int ret;
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
buf->start, buf->len, parent);
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
- root->root_key.objectid, 0, false);
+ root_id, 0, false);
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+ if (root_id != BTRFS_TREE_LOG_OBJECTID) {
btrfs_ref_tree_mod(fs_info, &generic_ref);
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
BUG_ON(ret); /* -ENOMEM */
@@ -3298,7 +3298,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_block_group *cache;
bool must_pin = false;
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+ if (root_id != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, buf->start);
if (!ret) {
btrfs_redirty_list_add(trans->transaction, buf);
@@ -5472,7 +5472,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
goto owner_mismatch;
}
- btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
+ wc->refs[level] == 1);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
@@ -6051,6 +6052,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
int dev_ret = 0;
int ret = 0;
+ if (range->start == U64_MAX)
+ return -EINVAL;
+
/*
* Check range overflow if range->len is set.
* The default range->len is U64_MAX.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4e03a6d3aa32..9234d96a7fd5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4314,6 +4314,20 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
return;
/*
+ * A read may stumble upon this buffer later, make sure that it gets an
+ * error and knows there was an error.
+ */
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+
+ /*
+ * We need to set the mapping with the io error as well because a write
+ * error will flip the file system readonly, and then syncfs() will
+ * return a 0 because we are readonly if we don't modify the err seq for
+ * the superblock.
+ */
+ mapping_set_error(page->mapping, -EIO);
+
+ /*
* If we error out, we should add back the dirty_metadata_bytes
* to make it consistent.
*/
@@ -6597,6 +6611,14 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
+ /*
+ * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
+ * operation, which could potentially still be in flight. In this case
+ * we simply want to return an error.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
+ return -EIO;
+
if (eb->fs_info->sectorsize < PAGE_SIZE)
return read_extent_buffer_subpage(eb, wait, mirror_num);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index a33bca94d133..3abec44c6255 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_tree_lock(free_space_root->node);
btrfs_clean_tree_block(free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
- btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
- 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
+ free_space_root->node, 0, 1);
btrfs_put_root(free_space_root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 92138ac2a4e2..edfecfe62b4b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -617,11 +617,13 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
* Since we don't abort the transaction in this case, free the
* tree block so that we don't leak space and leave the
* filesystem in an inconsistent state (an extent item in the
- * extent tree without backreferences). Also no need to have
- * the tree block locked since it is not in any tree at this
- * point, so no other task can find it and use it.
+ * extent tree with a backreference for a root that does not
+ * exists).
*/
- btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ btrfs_tree_lock(leaf);
+ btrfs_clean_tree_block(leaf);
+ btrfs_tree_unlock(leaf);
+ btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
free_extent_buffer(leaf);
goto fail;
}
@@ -3187,10 +3189,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
return -EPERM;
vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args)) {
- ret = PTR_ERR(vol_args);
- goto out;
- }
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
ret = -EOPNOTSUPP;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index db680f5be745..6c037f1252b7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1219,7 +1219,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_tree_lock(quota_root->node);
btrfs_clean_tree_block(quota_root->node);
btrfs_tree_unlock(quota_root->node);
- btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
+ quota_root->node, 0, 1);
btrfs_put_root(quota_root);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 12ceb14a1141..d20166336557 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -334,7 +334,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- BUG_ON(ret < 0);
+ if (ret < 0)
+ goto out;
if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8ab33caf016f..6993dcdba6f1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1181,6 +1181,7 @@ again:
parent_objectid, victim_name,
victim_name_len);
if (ret < 0) {
+ kfree(victim_name);
return ret;
} else if (!ret) {
ret = -ENOENT;
@@ -2908,6 +2909,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
path->nodes[*level]->len);
if (ret)
return ret;
+ btrfs_redirty_list_add(trans->transaction,
+ next);
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
@@ -2988,6 +2991,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next->start, next->len);
if (ret)
goto out;
+ btrfs_redirty_list_add(trans->transaction, next);
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
@@ -3438,8 +3442,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
extent_io_tree_release(&log->log_csum_range);
- if (trans && log->node)
- btrfs_redirty_list_add(trans->transaction, log->node);
btrfs_put_root(log);
}
@@ -3976,6 +3978,7 @@ search:
goto done;
}
if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
+ ctx->last_dir_item_offset = min_key.offset;
ret = overwrite_item(trans, log, dst_path,
path->nodes[0], path->slots[0],
&min_key);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0997e3cd74e9..fd0ced829edb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1370,8 +1370,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
bytenr_orig = btrfs_sb_offset(0);
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
- if (ret)
- return ERR_PTR(ret);
+ if (ret) {
+ device = ERR_PTR(ret);
+ goto error_bdev_put;
+ }
disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
if (IS_ERR(disk_super)) {
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 67d932d70798..678a29469511 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1860,6 +1860,7 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
+ btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
@@ -1942,6 +1943,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
ASSERT(block_group->free_space_ctl->free_space == 0);
btrfs_clear_treelog_bg(block_group);
+ btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
map = block_group->physical_map;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b9460b6fb76f..c447fa2e2d1f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4350,7 +4350,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
int bits = (fmode << 1) | 1;
- bool is_opened = false;
+ bool already_opened = false;
int i;
if (count == 1)
@@ -4358,19 +4358,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
spin_lock(&ci->i_ceph_lock);
for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
- if (bits & (1 << i))
- ci->i_nr_by_mode[i] += count;
-
/*
- * If any of the mode ref is larger than 1,
+ * If any of the mode ref is larger than 0,
* that means it has been already opened by
* others. Just skip checking the PIN ref.
*/
- if (i && ci->i_nr_by_mode[i] > 1)
- is_opened = true;
+ if (i && ci->i_nr_by_mode[i])
+ already_opened = true;
+
+ if (bits & (1 << i))
+ ci->i_nr_by_mode[i] += count;
}
- if (!is_opened)
+ if (!already_opened)
percpu_counter_inc(&mdsc->metric.opened_inodes);
spin_unlock(&ci->i_ceph_lock);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 02a0a0fd9ccd..c138e8126286 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -605,13 +605,25 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino);
in.cap.flags = CEPH_CAP_FLAG_AUTH;
in.ctime = in.mtime = in.atime = iinfo.btime;
- in.mode = cpu_to_le32((u32)mode);
in.truncate_seq = cpu_to_le32(1);
in.truncate_size = cpu_to_le64(-1ULL);
in.xattr_version = cpu_to_le64(1);
in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid()));
- in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ?
- dir->i_gid : current_fsgid()));
+ if (dir->i_mode & S_ISGID) {
+ in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid));
+
+ /* Directories always inherit the setgid bit. */
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+ !in_group_p(dir->i_gid) &&
+ !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID))
+ mode &= ~S_ISGID;
+ } else {
+ in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
+ }
+ in.mode = cpu_to_le32((u32)mode);
+
in.nlink = cpu_to_le32(1);
in.max_size = cpu_to_le64(lo->stripe_unit);
@@ -847,7 +859,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
ssize_t ret;
u64 off = iocb->ki_pos;
u64 len = iov_iter_count(to);
- u64 i_size;
+ u64 i_size = i_size_read(inode);
dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 250aad330a10..c30eefc0ac19 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3683,7 +3683,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_pagelist *pagelist = recon_state->pagelist;
struct dentry *dentry;
char *path;
- int pathlen, err;
+ int pathlen = 0, err;
u64 pathbase;
u64 snap_follows;
@@ -3703,7 +3703,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
} else {
path = NULL;
- pathlen = 0;
pathbase = 0;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 18448dbd762a..1060164b984a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3064,6 +3064,13 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
(cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
+ /*
+ * The cookie is initialized from volume info returned above.
+ * Inside cifs_fscache_get_super_cookie it checks
+ * that we do not get super cookie twice.
+ */
+ cifs_fscache_get_super_cookie(tcon);
+
out:
mnt_ctx->server = server;
mnt_ctx->ses = ses;
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 6a179ae753c1..e3ed25dc6f3f 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -435,6 +435,42 @@ out:
}
/*
+ * Remove duplicate path delimiters. Windows is supposed to do that
+ * but there are some bugs that prevent rename from working if there are
+ * multiple delimiters.
+ *
+ * Returns a sanitized duplicate of @path. The caller is responsible for
+ * cleaning up the original.
+ */
+#define IS_DELIM(c) ((c) == '/' || (c) == '\\')
+static char *sanitize_path(char *path)
+{
+ char *cursor1 = path, *cursor2 = path;
+
+ /* skip all prepended delimiters */
+ while (IS_DELIM(*cursor1))
+ cursor1++;
+
+ /* copy the first letter */
+ *cursor2 = *cursor1;
+
+ /* copy the remainder... */
+ while (*(cursor1++)) {
+ /* ... skipping all duplicated delimiters */
+ if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2))
+ continue;
+ *(++cursor2) = *cursor1;
+ }
+
+ /* if the last character is a delimiter, skip it */
+ if (IS_DELIM(*(cursor2 - 1)))
+ cursor2--;
+
+ *(cursor2) = '\0';
+ return kstrdup(path, GFP_KERNEL);
+}
+
+/*
* Parse a devname into substrings and populate the ctx->UNC and ctx->prepath
* fields with the result. Returns 0 on success and an error otherwise
* (e.g. ENOMEM or EINVAL)
@@ -493,7 +529,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
if (!*pos)
return 0;
- ctx->prepath = kstrdup(pos, GFP_KERNEL);
+ ctx->prepath = sanitize_path(pos);
if (!ctx->prepath)
return -ENOMEM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 96d083db1737..279622e4eb1c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1356,11 +1356,6 @@ iget_no_retry:
goto out;
}
-#ifdef CONFIG_CIFS_FSCACHE
- /* populate tcon->resource_id */
- tcon->resource_id = CIFS_I(inode)->uniqueid;
-#endif
-
if (rc && tcon->pipe) {
cifs_dbg(FYI, "ipc connection - fake read inode\n");
spin_lock(&inode->i_lock);
@@ -1375,14 +1370,6 @@ iget_no_retry:
iget_failed(inode);
inode = ERR_PTR(rc);
}
-
- /*
- * The cookie is initialized from volume info returned above.
- * Inside cifs_fscache_get_super_cookie it checks
- * that we do not get super cookie twice.
- */
- cifs_fscache_get_super_cookie(tcon);
-
out:
kfree(path);
free_xid(xid);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af63548eaf26..035dc3e245dc 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -590,8 +590,8 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
{
unsigned int tioffset; /* challenge message target info area */
unsigned int tilen; /* challenge message target info area length */
-
CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
+ __u32 server_flags;
if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
cifs_dbg(VFS, "challenge blob len %d too small\n", blob_len);
@@ -609,12 +609,37 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
+ server_flags = le32_to_cpu(pblob->NegotiateFlags);
+ cifs_dbg(FYI, "%s: negotiate=0x%08x challenge=0x%08x\n", __func__,
+ ses->ntlmssp->client_flags, server_flags);
+
+ if ((ses->ntlmssp->client_flags & (NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_SIGN)) &&
+ (!(server_flags & NTLMSSP_NEGOTIATE_56) && !(server_flags & NTLMSSP_NEGOTIATE_128))) {
+ cifs_dbg(VFS, "%s: requested signing/encryption but server did not return either 56-bit or 128-bit session key size\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (!(server_flags & NTLMSSP_NEGOTIATE_NTLM) && !(server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) {
+ cifs_dbg(VFS, "%s: server does not seem to support either NTLMv1 or NTLMv2\n", __func__);
+ return -EINVAL;
+ }
+ if (ses->server->sign && !(server_flags & NTLMSSP_NEGOTIATE_SIGN)) {
+ cifs_dbg(VFS, "%s: forced packet signing but server does not seem to support it\n",
+ __func__);
+ return -EOPNOTSUPP;
+ }
+ if ((ses->ntlmssp->client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ !(server_flags & NTLMSSP_NEGOTIATE_KEY_XCH))
+ pr_warn_once("%s: authentication has been weakened as server does not support key exchange\n",
+ __func__);
+
+ ses->ntlmssp->server_flags = server_flags;
+
memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
- /* BB we could decode pblob->NegotiateFlags; some may be useful */
/* In particular we can examine sign flags */
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
- ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
+
tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
if (tioffset > blob_len || tioffset + tilen > blob_len) {
@@ -721,13 +746,13 @@ int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
- NTLMSSP_NEGOTIATE_SEAL;
- if (server->sign)
- flags |= NTLMSSP_NEGOTIATE_SIGN;
+ NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL |
+ NTLMSSP_NEGOTIATE_SIGN;
if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
tmp = *pbuffer + sizeof(NEGOTIATE_MESSAGE);
+ ses->ntlmssp->client_flags = flags;
sec_blob->NegotiateFlags = cpu_to_le32(flags);
/* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
@@ -779,15 +804,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
- flags = NTLMSSP_NEGOTIATE_56 |
- NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
- NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
- NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
- if (ses->server->sign)
- flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
- flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+ flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET |
+ NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -834,9 +852,9 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
*pbuffer, &tmp,
nls_cp);
- if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
- (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
- && !calc_seckey(ses)) {
+ if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) &&
+ !calc_seckey(ses)) {
memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
diff --git a/fs/file.c b/fs/file.c
index ad4a8bf3cf10..97d212a9b814 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -841,28 +841,68 @@ void do_close_on_exec(struct files_struct *files)
spin_unlock(&files->file_lock);
}
-static struct file *__fget_files(struct files_struct *files, unsigned int fd,
- fmode_t mask, unsigned int refs)
+static inline struct file *__fget_files_rcu(struct files_struct *files,
+ unsigned int fd, fmode_t mask, unsigned int refs)
{
- struct file *file;
+ for (;;) {
+ struct file *file;
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ struct file __rcu **fdentry;
- rcu_read_lock();
-loop:
- file = files_lookup_fd_rcu(files, fd);
- if (file) {
- /* File object ref couldn't be taken.
- * dup2() atomicity guarantee is the reason
- * we loop to catch the new file (or NULL pointer)
+ if (unlikely(fd >= fdt->max_fds))
+ return NULL;
+
+ fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
+ file = rcu_dereference_raw(*fdentry);
+ if (unlikely(!file))
+ return NULL;
+
+ if (unlikely(file->f_mode & mask))
+ return NULL;
+
+ /*
+ * Ok, we have a file pointer. However, because we do
+ * this all locklessly under RCU, we may be racing with
+ * that file being closed.
+ *
+ * Such a race can take two forms:
+ *
+ * (a) the file ref already went down to zero,
+ * and get_file_rcu_many() fails. Just try
+ * again:
*/
- if (file->f_mode & mask)
- file = NULL;
- else if (!get_file_rcu_many(file, refs))
- goto loop;
- else if (files_lookup_fd_raw(files, fd) != file) {
+ if (unlikely(!get_file_rcu_many(file, refs)))
+ continue;
+
+ /*
+ * (b) the file table entry has changed under us.
+ * Note that we don't need to re-check the 'fdt->fd'
+ * pointer having changed, because it always goes
+ * hand-in-hand with 'fdt'.
+ *
+ * If so, we need to put our refs and try again.
+ */
+ if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
+ unlikely(rcu_dereference_raw(*fdentry) != file)) {
fput_many(file, refs);
- goto loop;
+ continue;
}
+
+ /*
+ * Ok, we have a ref to the file, and checked that it
+ * still exists.
+ */
+ return file;
}
+}
+
+static struct file *__fget_files(struct files_struct *files, unsigned int fd,
+ fmode_t mask, unsigned int refs)
+{
+ struct file *file;
+
+ rcu_read_lock();
+ file = __fget_files_rcu(files, fd, mask, refs);
rcu_read_unlock();
return file;
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 50cf9f92da36..5c4f582d6549 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
static void create_worker_cb(struct callback_head *cb);
+static void io_wq_cancel_tw_create(struct io_wq *wq);
static bool io_worker_get(struct io_worker *worker)
{
@@ -357,12 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
test_and_set_bit_lock(0, &worker->create_state))
goto fail_release;
+ atomic_inc(&wq->worker_refs);
init_task_work(&worker->create_work, func);
worker->create_index = acct->index;
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
- clear_bit_unlock(0, &worker->create_state);
+ /*
+ * EXIT may have been set after checking it above, check after
+ * adding the task_work and remove any creation item if it is
+ * now set. wq exit does that too, but we can have added this
+ * work item after we canceled in io_wq_exit_workers().
+ */
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
+ io_wq_cancel_tw_create(wq);
+ io_worker_ref_put(wq);
return true;
}
+ io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
fail_release:
io_worker_release(worker);
@@ -384,7 +395,9 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
+ raw_spin_unlock(&wqe->lock);
io_queue_worker_create(worker, acct, create_worker_cb);
+ raw_spin_lock(&wqe->lock);
}
}
@@ -1198,13 +1211,9 @@ void io_wq_exit_start(struct io_wq *wq)
set_bit(IO_WQ_BIT_EXIT, &wq->state);
}
-static void io_wq_exit_workers(struct io_wq *wq)
+static void io_wq_cancel_tw_create(struct io_wq *wq)
{
struct callback_head *cb;
- int node;
-
- if (!wq->task)
- return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker;
@@ -1212,6 +1221,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker);
}
+}
+
+static void io_wq_exit_workers(struct io_wq *wq)
+{
+ int node;
+
+ if (!wq->task)
+ return;
+
+ io_wq_cancel_tw_create(wq);
rcu_read_lock();
for_each_node(node) {
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c4f217613f56..fb2a0cb4aaf8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2891,9 +2891,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
kiocb->ki_pos = READ_ONCE(sqe->off);
- if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) {
- req->flags |= REQ_F_CUR_POS;
- kiocb->ki_pos = file->f_pos;
+ if (kiocb->ki_pos == -1) {
+ if (!(file->f_mode & FMODE_STREAM)) {
+ req->flags |= REQ_F_CUR_POS;
+ kiocb->ki_pos = file->f_pos;
+ } else {
+ kiocb->ki_pos = 0;
+ }
}
kiocb->ki_flags = iocb_flags(file);
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
@@ -9824,7 +9828,7 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
/*
* Find any io_uring ctx that this task has registered or done IO on, and cancel
- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation.
+ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
*/
static __cold void io_uring_cancel_generic(bool cancel_all,
struct io_sq_data *sqd)
@@ -9866,8 +9870,10 @@ static __cold void io_uring_cancel_generic(bool cancel_all,
cancel_all);
}
- prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
+ io_run_task_work();
io_uring_drop_tctx_refs(current);
+
/*
* If we've seen completions, retry without waiting. This
* avoids a race where a completion comes in before we did
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
index 8317f7ca402b..5052be9261d9 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/ksmbd/ndr.c
@@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value)
static int ndr_read_int32(struct ndr *n, __u32 *value)
{
if (n->offset + sizeof(__u32) > n->length)
- return 0;
+ return -EINVAL;
if (value)
*value = le32_to_cpu(*(__le32 *)ndr_get_field(n));
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index 0a5d8450e835..02a44d28bdaf 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -271,9 +271,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
- if (conn->cipher_type)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 49c9da37315c..b8b3a4c28b74 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -915,6 +915,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
}
}
+/**
+ * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption
+ * @conn: smb connection
+ *
+ * Return: true if connection should be encrypted, else false
+ */
+static bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
+{
+ if (!conn->ops->generate_encryptionkey)
+ return false;
+
+ /*
+ * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag.
+ * SMB 3.1.1 uses the cipher_type field.
+ */
+ return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) ||
+ conn->cipher_type;
+}
+
static void decode_compress_ctxt(struct ksmbd_conn *conn,
struct smb2_compression_capabilities_context *pneg_ctxt)
{
@@ -1469,8 +1488,7 @@ static int ntlm_authenticate(struct ksmbd_work *work)
(req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
sess->sign = true;
- if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION &&
- conn->ops->generate_encryptionkey &&
+ if (smb3_encryption_negotiated(conn) &&
!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
rc = conn->ops->generate_encryptionkey(sess);
if (rc) {
@@ -1559,8 +1577,7 @@ static int krb5_authenticate(struct ksmbd_work *work)
(req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
sess->sign = true;
- if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) &&
- conn->ops->generate_encryptionkey) {
+ if (smb3_encryption_negotiated(conn)) {
retval = conn->ops->generate_encryptionkey(sess);
if (retval) {
ksmbd_debug(SMB,
@@ -2962,6 +2979,10 @@ int smb2_open(struct ksmbd_work *work)
&pntsd_size, &fattr);
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
+ if (rc) {
+ kfree(pntsd);
+ goto err_out;
+ }
rc = ksmbd_vfs_set_sd_xattr(conn,
user_ns,
diff --git a/fs/namespace.c b/fs/namespace.c
index 659a8f39c61a..b696543adab8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4263,12 +4263,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
return err;
err = user_path_at(dfd, path, kattr.lookup_flags, &target);
- if (err)
- return err;
-
- err = do_mount_setattr(&target, &kattr);
+ if (!err) {
+ err = do_mount_setattr(&target, &kattr);
+ path_put(&target);
+ }
finish_mount_kattr(&kattr);
- path_put(&target);
return err;
}
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 7046f9bdd8dc..75c76cbb27cc 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work)
netfs_rreq_do_write_to_cache(rreq);
}
-static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
- bool was_async)
+static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq)
{
- if (was_async) {
- rreq->work.func = netfs_rreq_write_to_cache_work;
- if (!queue_work(system_unbound_wq, &rreq->work))
- BUG();
- } else {
- netfs_rreq_do_write_to_cache(rreq);
- }
+ rreq->work.func = netfs_rreq_write_to_cache_work;
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ BUG();
}
/*
@@ -558,7 +553,7 @@ again:
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags))
- return netfs_rreq_write_to_cache(rreq, was_async);
+ return netfs_rreq_write_to_cache(rreq);
netfs_rreq_completed(rreq, was_async);
}
@@ -960,7 +955,7 @@ int netfs_readpage(struct file *file,
rreq = netfs_alloc_read_request(ops, netfs_priv, file);
if (!rreq) {
if (netfs_priv)
- ops->cleanup(netfs_priv, folio_file_mapping(folio));
+ ops->cleanup(folio_file_mapping(folio), netfs_priv);
folio_unlock(folio);
return -ENOMEM;
}
@@ -1191,7 +1186,7 @@ have_folio:
goto error;
have_folio_no_wait:
if (netfs_priv)
- ops->cleanup(netfs_priv, mapping);
+ ops->cleanup(mapping, netfs_priv);
*_folio = folio;
_leave(" = 0");
return 0;
@@ -1202,7 +1197,7 @@ error:
folio_unlock(folio);
folio_put(folio);
if (netfs_priv)
- ops->cleanup(netfs_priv, mapping);
+ ops->cleanup(mapping, netfs_priv);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4418517f6f12..15dac36ca852 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -438,22 +438,19 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
struct nfsd3_readdirres *resp,
- int count)
+ u32 count)
{
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
- count = min_t(u32, count, svc_max_payload(rqstp));
+ count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp));
memset(buf, 0, sizeof(*buf));
/* Reserve room for the NULL ptr & eof flag (-2 words) */
buf->buflen = count - XDR_UNIT * 2;
buf->pages = rqstp->rq_next_page;
- while (count > 0) {
- rqstp->rq_next_page++;
- count -= PAGE_SIZE;
- }
+ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* This is xdr_init_encode(), but it assumes that
* the head kvec has already been consumed. */
@@ -462,7 +459,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
xdr->page_ptr = buf->pages;
xdr->iov = NULL;
xdr->p = page_address(*buf->pages);
- xdr->end = xdr->p + (PAGE_SIZE >> 2);
+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
xdr->rqst = NULL;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 6fedc49726bf..c634483d85d2 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -2156,6 +2156,7 @@ static struct notifier_block nfsd4_cld_block = {
int
register_cld_notifier(void)
{
+ WARN_ON(!nfsd_net_id);
return rpc_pipefs_notifier_register(&nfsd4_cld_block);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfad94c70b84..1956d377d1a6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1207,6 +1207,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
return 0;
}
+static bool delegation_hashed(struct nfs4_delegation *dp)
+{
+ return !(list_empty(&dp->dl_perfile));
+}
+
static bool
unhash_delegation_locked(struct nfs4_delegation *dp)
{
@@ -1214,7 +1219,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
lockdep_assert_held(&state_lock);
- if (list_empty(&dp->dl_perfile))
+ if (!delegation_hashed(dp))
return false;
dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
@@ -4598,7 +4603,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock);
- if (dp->dl_time == 0) {
+ if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index af8531c3854a..51a49e0cfe37 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1521,12 +1521,9 @@ static int __init init_nfsd(void)
int retval;
printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = register_cld_notifier();
- if (retval)
- return retval;
retval = nfsd4_init_slabs();
if (retval)
- goto out_unregister_notifier;
+ return retval;
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
@@ -1545,9 +1542,14 @@ static int __init init_nfsd(void)
goto out_free_exports;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
+ goto out_free_filesystem;
+ retval = register_cld_notifier();
+ if (retval)
goto out_free_all;
return 0;
out_free_all:
+ unregister_pernet_subsys(&nfsd_net_ops);
+out_free_filesystem:
unregister_filesystem(&nfsd_fs_type);
out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
@@ -1561,13 +1563,12 @@ out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
-out_unregister_notifier:
- unregister_cld_notifier();
return retval;
}
static void __exit exit_nfsd(void)
{
+ unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
@@ -1577,7 +1578,6 @@ static void __exit exit_nfsd(void)
nfsd4_free_slabs();
nfsd4_exit_pnfs();
unregister_filesystem(&nfsd_fs_type);
- unregister_cld_notifier();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index eea5b59b6a6c..de282f3273c5 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -556,17 +556,17 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp)
static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
struct nfsd_readdirres *resp,
- int count)
+ u32 count)
{
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
- count = min_t(u32, count, PAGE_SIZE);
+ count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp));
memset(buf, 0, sizeof(*buf));
/* Reserve room for the NULL ptr & eof flag (-2 words) */
- buf->buflen = count - sizeof(__be32) * 2;
+ buf->buflen = count - XDR_UNIT * 2;
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page++;
@@ -577,7 +577,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
xdr->page_ptr = buf->pages;
xdr->iov = NULL;
xdr->p = page_address(*buf->pages);
- xdr->end = xdr->p + (PAGE_SIZE >> 2);
+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
xdr->rqst = NULL;
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 040e1cf90528..65ce0e72e7b9 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -35,17 +35,7 @@
void signalfd_cleanup(struct sighand_struct *sighand)
{
- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
- /*
- * The lockless check can race with remove_wait_queue() in progress,
- * but in this case its caller should run under rcu_read_lock() and
- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
- */
- if (likely(!waitqueue_active(wqh)))
- return;
-
- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
- wake_up_poll(wqh, EPOLLHUP | POLLFREE);
+ wake_up_pollfree(&sighand->signalfd_wqh);
}
struct signalfd_ctx {
diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c
index 85ba15a60b13..043e4cb839fa 100644
--- a/fs/smbfs_common/cifs_arc4.c
+++ b/fs/smbfs_common/cifs_arc4.c
@@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l
ctx->y = y;
}
EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
-
-static int __init
-init_smbfs_common(void)
-{
- return 0;
-}
-static void __init
-exit_smbfs_common(void)
-{
-}
-
-module_init(init_smbfs_common)
-module_exit(exit_smbfs_common)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 925a621b432e..3616839c5c4b 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -161,6 +161,77 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};
+static void change_gid(struct dentry *dentry, kgid_t gid)
+{
+ if (!dentry->d_inode)
+ return;
+ dentry->d_inode->i_gid = gid;
+}
+
+/*
+ * Taken from d_walk, but without he need for handling renames.
+ * Nothing can be renamed while walking the list, as tracefs
+ * does not support renames. This is only called when mounting
+ * or remounting the file system, to set all the files to
+ * the given gid.
+ */
+static void set_gid(struct dentry *parent, kgid_t gid)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
+
+ change_gid(this_parent, gid);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+ change_gid(dentry, gid);
+
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ rcu_read_lock();
+ascend:
+ if (this_parent != parent) {
+ struct dentry *child = this_parent;
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+ rcu_read_unlock();
+ goto resume;
+ }
+ rcu_read_unlock();
+ spin_unlock(&this_parent->d_lock);
+ return;
+}
+
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -193,6 +264,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
if (!gid_valid(gid))
return -EINVAL;
opts->gid = gid;
+ set_gid(tracefs_mount->mnt_root, gid);
break;
case Opt_mode:
if (match_octal(&args[0], &option))
@@ -414,6 +486,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
return end_creating(dentry);
@@ -436,6 +510,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
inode->i_op = ops;
inode->i_fop = &simple_dir_operations;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 174cd8950cb6..bc85e045845d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -687,7 +687,8 @@ xfs_ioc_space(
if (bf->l_start > XFS_ISIZE(ip)) {
error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
+ bf->l_start - XFS_ISIZE(ip),
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e21459f9923a..778b57b1f020 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1765,7 +1765,10 @@ static int
xfs_remount_ro(
struct xfs_mount *mp)
{
- int error;
+ struct xfs_icwalk icw = {
+ .icw_flags = XFS_ICWALK_FLAG_SYNC,
+ };
+ int error;
/*
* Cancel background eofb scanning so it cannot race with the final
@@ -1773,8 +1776,13 @@ xfs_remount_ro(
*/
xfs_blockgc_stop(mp);
- /* Get rid of any leftover CoW reservations... */
- error = xfs_blockgc_free_space(mp, NULL);
+ /*
+ * Clear out all remaining COW staging extents and speculative post-EOF
+ * preallocations so that we don't leave inodes requiring inactivation
+ * cleanups during reclaim on a read-only mount. We must process every
+ * cached inode, so this requires a synchronous cache scan.
+ */
+ error = xfs_blockgc_free_space(mp, &icw);
if (error) {
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 259ee2bda492..b76dfb310ab6 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1787,5 +1787,6 @@ static void __exit zonefs_exit(void)
MODULE_AUTHOR("Damien Le Moal");
MODULE_DESCRIPTION("Zone file system for zoned block devices");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_FS("zonefs");
module_init(zonefs_init);
module_exit(zonefs_exit);