summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c18
-rw-r--r--fs/btrfs/inode.c73
-rw-r--r--fs/btrfs/ioctl.c14
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c51
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/buffer.c8
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/internal.h2
-rw-r--r--fs/io-wq.c12
-rw-r--r--fs/io_uring.c84
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c90
-rw-r--r--fs/nfs/nfstrace.h2
-rw-r--r--fs/pstore/ram.c4
-rw-r--r--fs/pstore/ram_core.c2
18 files changed, 204 insertions, 186 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 497f979018c2..5c794f4b051a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -908,6 +908,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_fid fid = {};
struct inode *inode;
struct dentry *d;
struct key *key;
@@ -951,21 +952,18 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
afs_stat_v(dvnode, n_lookup);
inode = afs_do_lookup(dir, dentry, key);
key_put(key);
- if (inode == ERR_PTR(-ENOENT)) {
+ if (inode == ERR_PTR(-ENOENT))
inode = afs_try_auto_mntpt(dentry, dir);
- } else {
- dentry->d_fsdata =
- (void *)(unsigned long)dvnode->status.data_version;
- }
+
+ if (!IS_ERR_OR_NULL(inode))
+ fid = AFS_FS_I(inode)->fid;
+
d = d_splice_alias(inode, dentry);
if (!IS_ERR_OR_NULL(d)) {
d->d_fsdata = dentry->d_fsdata;
- trace_afs_lookup(dvnode, &d->d_name,
- inode ? AFS_FS_I(inode) : NULL);
+ trace_afs_lookup(dvnode, &d->d_name, &fid);
} else {
- trace_afs_lookup(dvnode, &dentry->d_name,
- IS_ERR_OR_NULL(inode) ? NULL
- : AFS_FS_I(inode));
+ trace_afs_lookup(dvnode, &dentry->d_name, &fid);
}
return d;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5509c41a4f43..c70baafb2a39 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4238,18 +4238,30 @@ out:
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct inode *dir, u64 objectid,
- const char *name, int name_len)
+ struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
+ const char *name = dentry->d_name.name;
+ int name_len = dentry->d_name.len;
u64 index;
int ret;
+ u64 objectid;
u64 dir_ino = btrfs_ino(BTRFS_I(dir));
+ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
+ objectid = inode->root->root_key.objectid;
+ } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
+ objectid = inode->location.objectid;
+ } else {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -4271,13 +4283,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
- dir_ino, &index, name, name_len);
- if (ret < 0) {
- if (ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
+ /*
+ * This is a placeholder inode for a subvolume we didn't have a
+ * reference to at the time of the snapshot creation. In the meantime
+ * we could have renamed the real subvol link into our snapshot, so
+ * depending on btrfs_del_root_ref to return -ENOENT here is incorret.
+ * Instead simply lookup the dir_index_item for this entry so we can
+ * remove it. Otherwise we know we have a ref to the root and we can
+ * call btrfs_del_root_ref, and it _shouldn't_ fail.
+ */
+ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
di = btrfs_search_dir_index_item(root, path, dir_ino,
name, name_len);
if (IS_ERR_OR_NULL(di)) {
@@ -4292,8 +4307,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
index = key.offset;
+ btrfs_release_path(path);
+ } else {
+ ret = btrfs_del_root_ref(trans, objectid,
+ root->root_key.objectid, dir_ino,
+ &index, name, name_len);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
}
- btrfs_release_path(path);
ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
@@ -4487,8 +4510,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
- ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
- dentry->d_name.name, dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
@@ -4583,10 +4605,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- err = btrfs_unlink_subvol(trans, dir,
- BTRFS_I(inode)->location.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
+ err = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
}
@@ -9536,7 +9555,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
- u64 root_objectid;
int ret;
bool root_log_pinned = false;
bool dest_log_pinned = false;
@@ -9642,10 +9660,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
- root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
@@ -9661,10 +9676,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
- root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
@@ -9862,7 +9874,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
u64 index = 0;
- u64 root_objectid;
int ret;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
@@ -9970,10 +9981,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
- root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
@@ -9992,10 +10000,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode->i_ctime = current_time(new_inode);
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- root_objectid = BTRFS_I(new_inode)->location.objectid;
- ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
BUG_ON(new_inode->i_nlink == 0);
} else {
ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 18e328ce4b54..12ae31e1813e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4252,7 +4252,19 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
0);
- if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
+ /*
+ * Copy scrub args to user space even if btrfs_scrub_dev() returned an
+ * error. This is important as it allows user space to know how much
+ * progress scrub has done. For example, if scrub is canceled we get
+ * -ECANCELED from btrfs_scrub_dev() and return that error back to user
+ * space. Later user space can inspect the progress from the structure
+ * btrfs_ioctl_scrub_args and resume scrub from where it left off
+ * previously (btrfs-progs does this).
+ * If we fail to copy the btrfs_ioctl_scrub_args structure to user space
+ * then return -EFAULT to signal the structure was not copied or it may
+ * be corrupt and unreliable due to a partial copy.
+ */
+ if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
if (!(sa->flags & BTRFS_SCRUB_READONLY))
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d4282e12f2a6..39fc8c3d3a75 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2423,8 +2423,12 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
u64 nr_old_roots = 0;
int ret = 0;
+ /*
+ * If quotas get disabled meanwhile, the resouces need to be freed and
+ * we can't just exit here.
+ */
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
- return 0;
+ goto out_free;
if (new_roots) {
if (!maybe_fs_roots(new_roots))
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c58245797f30..da5abd62db22 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -517,6 +517,34 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
return 1;
}
+static bool reloc_root_is_dead(struct btrfs_root *root)
+{
+ /*
+ * Pair with set_bit/clear_bit in clean_dirty_subvols and
+ * btrfs_update_reloc_root. We need to see the updated bit before
+ * trying to access reloc_root
+ */
+ smp_rmb();
+ if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
+ return true;
+ return false;
+}
+
+/*
+ * Check if this subvolume tree has valid reloc tree.
+ *
+ * Reloc tree after swap is considered dead, thus not considered as valid.
+ * This is enough for most callers, as they don't distinguish dead reloc root
+ * from no reloc root. But should_ignore_root() below is a special case.
+ */
+static bool have_reloc_root(struct btrfs_root *root)
+{
+ if (reloc_root_is_dead(root))
+ return false;
+ if (!root->reloc_root)
+ return false;
+ return true;
+}
static int should_ignore_root(struct btrfs_root *root)
{
@@ -525,6 +553,10 @@ static int should_ignore_root(struct btrfs_root *root)
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return 0;
+ /* This root has been merged with its reloc tree, we can ignore it */
+ if (reloc_root_is_dead(root))
+ return 1;
+
reloc_root = root->reloc_root;
if (!reloc_root)
return 0;
@@ -1439,7 +1471,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
* The subvolume has reloc tree but the swap is finished, no need to
* create/update the dead reloc tree
*/
- if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
+ if (reloc_root_is_dead(root))
return 0;
if (root->reloc_root) {
@@ -1478,8 +1510,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root_item *root_item;
int ret;
- if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
- !root->reloc_root)
+ if (!have_reloc_root(root))
goto out;
reloc_root = root->reloc_root;
@@ -1489,6 +1520,11 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
if (fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
+ /*
+ * Mark the tree as dead before we change reloc_root so
+ * have_reloc_root will not touch it from now on.
+ */
+ smp_wmb();
__del_reloc_root(reloc_root);
}
@@ -2201,6 +2237,11 @@ static int clean_dirty_subvols(struct reloc_control *rc)
if (ret2 < 0 && !ret)
ret = ret2;
}
+ /*
+ * Need barrier to ensure clear_bit() only happens after
+ * root->reloc_root = NULL. Pairs with have_reloc_root.
+ */
+ smp_wmb();
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
btrfs_put_fs_root(root);
} else {
@@ -4718,7 +4759,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
struct btrfs_root *root = pending->root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
- if (!root->reloc_root || !rc)
+ if (!rc || !have_reloc_root(root))
return;
if (!rc->merge_reloc_tree)
@@ -4752,7 +4793,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct reloc_control *rc = root->fs_info->reloc_ctl;
int ret;
- if (!root->reloc_root || !rc)
+ if (!rc || !have_reloc_root(root))
return 0;
rc = root->fs_info->reloc_ctl;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 3b17b647d002..612411c74550 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -376,11 +376,13 @@ again:
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
-
- WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
- WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
- WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
+ if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
+ (btrfs_root_ref_name_len(leaf, ref) != name_len) ||
+ memcmp_extent_buffer(leaf, name, ptr, name_len)) {
+ err = -ENOENT;
+ goto out;
+ }
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6d3f08bfff3..9b78e720c697 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3881,7 +3881,11 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
}
}
- num_devices = btrfs_num_devices(fs_info);
+ /*
+ * rw_devices will not change at the moment, device add/delete/replace
+ * are excluded by EXCL_OP
+ */
+ num_devices = fs_info->fs_devices->rw_devices;
/*
* SINGLE profile on-disk has no profile bit, but in-memory we have a
diff --git a/fs/buffer.c b/fs/buffer.c
index e94a6619464c..18a87ec8a465 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3031,7 +3031,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
* errors, this only handles the "we need to be able to
* do IO at the final sector" case.
*/
-void guard_bio_eod(int op, struct bio *bio)
+void guard_bio_eod(struct bio *bio)
{
sector_t maxsector;
struct hd_struct *part;
@@ -3095,15 +3095,15 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
- /* Take care of bh's that straddle the end of the device */
- guard_bio_eod(op, bio);
-
if (buffer_meta(bh))
op_flags |= REQ_META;
if (buffer_prio(bh))
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
+ /* Take care of bh's that straddle the end of the device */
+ guard_bio_eod(bio);
+
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 00dfe17871ac..c5e6eff5a381 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -352,7 +352,7 @@ static struct kobject *cdev_get(struct cdev *p)
if (owner && !try_module_get(owner))
return NULL;
- kobj = kobject_get(&p->kobj);
+ kobj = kobject_get_unless_zero(&p->kobj);
if (!kobj)
module_put(owner);
return kobj;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a63d779eac10..ce715380143c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -882,6 +882,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT;
+ ssize_t res;
int err;
ap->args.out_pages = true;
@@ -896,7 +897,8 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
if (!err)
return;
} else {
- err = fuse_simple_request(fc, &ap->args);
+ res = fuse_simple_request(fc, &ap->args);
+ err = res < 0 ? res : 0;
}
fuse_readpages_end(fc, &ap->args, err);
}
diff --git a/fs/internal.h b/fs/internal.h
index 4a7da1df573d..e3fa69544b66 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -38,7 +38,7 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
/*
* buffer.c
*/
-extern void guard_bio_eod(int rw, struct bio *bio);
+extern void guard_bio_eod(struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 541c8a3e0bbb..5147d2213b01 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -445,10 +445,14 @@ next:
task_unlock(current);
}
if ((work->flags & IO_WQ_WORK_NEEDS_USER) && !worker->mm &&
- wq->mm && mmget_not_zero(wq->mm)) {
- use_mm(wq->mm);
- set_fs(USER_DS);
- worker->mm = wq->mm;
+ wq->mm) {
+ if (mmget_not_zero(wq->mm)) {
+ use_mm(wq->mm);
+ set_fs(USER_DS);
+ worker->mm = wq->mm;
+ } else {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ }
}
if (!worker->creds)
worker->creds = override_creds(wq->creds);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 562e3a1a1bf9..187dd94fd6b1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1786,6 +1786,9 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
struct iovec *iovec, struct iovec *fast_iov,
struct iov_iter *iter)
{
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED)
+ return 0;
if (!req->io && io_alloc_async_ctx(req))
return -ENOMEM;
@@ -1840,6 +1843,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
if (!force_nonblock)
req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
+ req->result = 0;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1863,18 +1867,6 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
else
ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
- /*
- * In case of a short read, punt to async. This can happen
- * if we have data partially cached. Alternatively we can
- * return the short read, in which case the application will
- * need to issue another SQE and wait for it. That SQE will
- * need async punt anyway, so it's more efficient to do it
- * here.
- */
- if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
- (req->flags & REQ_F_ISREG) &&
- ret2 > 0 && ret2 < io_size)
- ret2 = -EAGAIN;
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async);
@@ -1939,6 +1931,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
if (!force_nonblock)
req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
+ req->result = 0;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -2046,6 +2039,28 @@ static bool io_req_cancelled(struct io_kiocb *req)
return false;
}
+static void io_link_work_cb(struct io_wq_work **workptr)
+{
+ struct io_wq_work *work = *workptr;
+ struct io_kiocb *link = work->data;
+
+ io_queue_linked_timeout(link);
+ work->func = io_wq_submit_work;
+}
+
+static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
+{
+ struct io_kiocb *link;
+
+ io_prep_async_work(nxt, &link);
+ *workptr = &nxt->work;
+ if (link) {
+ nxt->work.flags |= IO_WQ_WORK_CB;
+ nxt->work.func = io_link_work_cb;
+ nxt->work.data = link;
+ }
+}
+
static void io_fsync_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
@@ -2064,7 +2079,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
io_cqring_add_event(req, ret);
io_put_req_find_next(req, &nxt);
if (nxt)
- *workptr = &nxt->work;
+ io_wq_assign_next(workptr, nxt);
}
static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
@@ -2120,7 +2135,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
io_cqring_add_event(req, ret);
io_put_req_find_next(req, &nxt);
if (nxt)
- *workptr = &nxt->work;
+ io_wq_assign_next(workptr, nxt);
}
static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
@@ -2386,7 +2401,7 @@ static void io_accept_finish(struct io_wq_work **workptr)
return;
__io_accept(req, &nxt, false);
if (nxt)
- *workptr = &nxt->work;
+ io_wq_assign_next(workptr, nxt);
}
#endif
@@ -2617,7 +2632,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
req_set_fail_links(req);
io_put_req_find_next(req, &nxt);
if (nxt)
- *workptr = &nxt->work;
+ io_wq_assign_next(workptr, nxt);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -3271,24 +3286,24 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return ret;
if (ctx->flags & IORING_SETUP_IOPOLL) {
+ const bool in_async = io_wq_current_is_worker();
+
if (req->result == -EAGAIN)
return -EAGAIN;
+ /* workqueue context doesn't hold uring_lock, grab it now */
+ if (in_async)
+ mutex_lock(&ctx->uring_lock);
+
io_iopoll_req_issued(req);
+
+ if (in_async)
+ mutex_unlock(&ctx->uring_lock);
}
return 0;
}
-static void io_link_work_cb(struct io_wq_work **workptr)
-{
- struct io_wq_work *work = *workptr;
- struct io_kiocb *link = work->data;
-
- io_queue_linked_timeout(link);
- work->func = io_wq_submit_work;
-}
-
static void io_wq_submit_work(struct io_wq_work **workptr)
{
struct io_wq_work *work = *workptr;
@@ -3325,17 +3340,8 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
}
/* if a dependent link is ready, pass it back */
- if (!ret && nxt) {
- struct io_kiocb *link;
-
- io_prep_async_work(nxt, &link);
- *workptr = &nxt->work;
- if (link) {
- nxt->work.flags |= IO_WQ_WORK_CB;
- nxt->work.func = io_link_work_cb;
- nxt->work.data = link;
- }
- }
+ if (!ret && nxt)
+ io_wq_assign_next(workptr, nxt);
}
static bool io_req_op_valid(int op)
@@ -5153,6 +5159,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
} else if (to_submit) {
struct mm_struct *cur_mm;
+ if (current->mm != ctx->sqo_mm ||
+ current_cred() != ctx->creds) {
+ ret = -EPERM;
+ goto out;
+ }
+
to_submit = min(to_submit, ctx->sq_entries);
mutex_lock(&ctx->uring_lock);
/* already have mm, so io_submit_sqes() won't try to grab it */
diff --git a/fs/mpage.c b/fs/mpage.c
index a63620cdb73a..ccba3c4c4479 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -62,7 +62,7 @@ static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
{
bio->bi_end_io = mpage_end_io;
bio_set_op_attrs(bio, op, op_flags);
- guard_bio_eod(op, bio);
+ guard_bio_eod(bio);
submit_bio(bio);
return NULL;
}
diff --git a/fs/namei.c b/fs/namei.c
index d6c91d1e88cb..d2720dc71d0e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1232,6 +1232,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
+ flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
break;
}
@@ -1649,17 +1650,15 @@ again:
if (IS_ERR(dentry))
return dentry;
if (unlikely(!d_in_lookup(dentry))) {
- if (!(flags & LOOKUP_NO_REVAL)) {
- int error = d_revalidate(dentry, flags);
- if (unlikely(error <= 0)) {
- if (!error) {
- d_invalidate(dentry);
- dput(dentry);
- goto again;
- }
+ int error = d_revalidate(dentry, flags);
+ if (unlikely(error <= 0)) {
+ if (!error) {
+ d_invalidate(dentry);
dput(dentry);
- dentry = ERR_PTR(error);
+ goto again;
}
+ dput(dentry);
+ dentry = ERR_PTR(error);
}
} else {
old = inode->i_op->lookup(inode, dentry, flags);
@@ -2618,72 +2617,6 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
EXPORT_SYMBOL(user_path_at_empty);
/**
- * mountpoint_last - look up last component for umount
- * @nd: pathwalk nameidata - currently pointing at parent directory of "last"
- *
- * This is a special lookup_last function just for umount. In this case, we
- * need to resolve the path without doing any revalidation.
- *
- * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since
- * mountpoints are always pinned in the dcache, their ancestors are too. Thus,
- * in almost all cases, this lookup will be served out of the dcache. The only
- * cases where it won't are if nd->last refers to a symlink or the path is
- * bogus and it doesn't exist.
- *
- * Returns:
- * -error: if there was an error during lookup. This includes -ENOENT if the
- * lookup found a negative dentry.
- *
- * 0: if we successfully resolved nd->last and found it to not to be a
- * symlink that needs to be followed.
- *
- * 1: if we successfully resolved nd->last and found it to be a symlink
- * that needs to be followed.
- */
-static int
-mountpoint_last(struct nameidata *nd)
-{
- int error = 0;
- struct dentry *dir = nd->path.dentry;
- struct path path;
-
- /* If we're in rcuwalk, drop out of it to handle last component */
- if (nd->flags & LOOKUP_RCU) {
- if (unlazy_walk(nd))
- return -ECHILD;
- }
-
- nd->flags &= ~LOOKUP_PARENT;
-
- if (unlikely(nd->last_type != LAST_NORM)) {
- error = handle_dots(nd, nd->last_type);
- if (error)
- return error;
- path.dentry = dget(nd->path.dentry);
- } else {
- path.dentry = d_lookup(dir, &nd->last);
- if (!path.dentry) {
- /*
- * No cached dentry. Mounted dentries are pinned in the
- * cache, so that means that this dentry is probably
- * a symlink or the path doesn't actually point
- * to a mounted dentry.
- */
- path.dentry = lookup_slow(&nd->last, dir,
- nd->flags | LOOKUP_NO_REVAL);
- if (IS_ERR(path.dentry))
- return PTR_ERR(path.dentry);
- }
- }
- if (d_flags_negative(smp_load_acquire(&path.dentry->d_flags))) {
- dput(path.dentry);
- return -ENOENT;
- }
- path.mnt = nd->path.mnt;
- return step_into(nd, &path, 0, d_backing_inode(path.dentry), 0);
-}
-
-/**
* path_mountpoint - look up a path to be umounted
* @nd: lookup context
* @flags: lookup flags
@@ -2699,14 +2632,17 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
int err;
while (!(err = link_path_walk(s, nd)) &&
- (err = mountpoint_last(nd)) > 0) {
+ (err = lookup_last(nd)) > 0) {
s = trailing_symlink(nd);
}
+ if (!err && (nd->flags & LOOKUP_RCU))
+ err = unlazy_walk(nd);
+ if (!err)
+ err = handle_lookup_down(nd);
if (!err) {
*path = nd->path;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
- follow_mount(path);
}
terminate_walk(nd);
return err;
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index f64a33d2a1d1..2a82dcce5fc1 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -206,7 +206,6 @@ TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT);
TRACE_DEFINE_ENUM(LOOKUP_PARENT);
TRACE_DEFINE_ENUM(LOOKUP_REVAL);
TRACE_DEFINE_ENUM(LOOKUP_RCU);
-TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL);
TRACE_DEFINE_ENUM(LOOKUP_OPEN);
TRACE_DEFINE_ENUM(LOOKUP_CREATE);
TRACE_DEFINE_ENUM(LOOKUP_EXCL);
@@ -224,7 +223,6 @@ TRACE_DEFINE_ENUM(LOOKUP_DOWN);
{ LOOKUP_PARENT, "PARENT" }, \
{ LOOKUP_REVAL, "REVAL" }, \
{ LOOKUP_RCU, "RCU" }, \
- { LOOKUP_NO_REVAL, "NO_REVAL" }, \
{ LOOKUP_OPEN, "OPEN" }, \
{ LOOKUP_CREATE, "CREATE" }, \
{ LOOKUP_EXCL, "EXCL" }, \
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 487ee39b438a..013486b5125e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -583,12 +583,12 @@ static int ramoops_init_przs(const char *name,
prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
&cxt->ecc_info,
cxt->memtype, flags, label);
+ kfree(label);
if (IS_ERR(prz_ar[i])) {
err = PTR_ERR(prz_ar[i]);
dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
name, record_size,
(unsigned long long)*paddr, err);
- kfree(label);
while (i > 0) {
i--;
@@ -629,12 +629,12 @@ static int ramoops_init_prz(const char *name,
label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
cxt->memtype, PRZ_FLAG_ZAP_OLD, label);
+ kfree(label);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
name, sz, (unsigned long long)*paddr, err);
- kfree(label);
return err;
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 8823f65888f0..1f4d8c06f9be 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -574,7 +574,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
/* Initialize general buffer state. */
raw_spin_lock_init(&prz->buffer_lock);
prz->flags = flags;
- prz->label = label;
+ prz->label = kstrdup(label, GFP_KERNEL);
ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)