summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/device-mapper/verity.rst4
-rw-r--r--MAINTAINERS8
-rw-r--r--drivers/char/hw_random/bcm2835-rng.c2
-rw-r--r--drivers/md/dm-bufio.c13
-rw-r--r--drivers/md/dm-cache-policy.h2
-rw-r--r--drivers/md/dm-clone-target.c2
-rw-r--r--drivers/md/dm-ioctl.c78
-rw-r--r--drivers/md/dm-raid.c4
-rw-r--r--drivers/md/dm-rq.c4
-rw-r--r--drivers/md/dm-stats.c2
-rw-r--r--drivers/md/dm-table.c78
-rw-r--r--drivers/md/dm-verity-target.c18
-rw-r--r--drivers/md/dm.c9
-rw-r--r--fs/btrfs/backref.c84
-rw-r--r--fs/btrfs/backref.h1
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/extent-io-tree.c15
-rw-r--r--fs/btrfs/send.c5
-rw-r--r--fs/btrfs/send.h5
-rw-r--r--fs/erofs/fscache.c3
-rw-r--r--fs/erofs/zdata.c17
-rw-r--r--fs/erofs/zdata.h6
-rw-r--r--fs/erofs/zmap.c22
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--kernel/bpf/cgroup_iter.c2
-rw-r--r--kernel/cgroup/cgroup.c99
26 files changed, 308 insertions, 178 deletions
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
index 1a6b91368e59..a65c1602cb23 100644
--- a/Documentation/admin-guide/device-mapper/verity.rst
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -141,6 +141,10 @@ root_hash_sig_key_desc <key_description>
also gain new certificates at run time if they are signed by a certificate
already in the secondary trusted keyring.
+try_verify_in_tasklet
+ If verity hashes are in cache, verify data blocks in kernel tasklet instead
+ of workqueue. This option can reduce IO latency.
+
Theory of operation
===================
diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f18502372..ca063a504026 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4459,13 +4459,15 @@ M: Josef Bacik <josef@toxicpanda.com>
M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org
S: Maintained
-W: http://btrfs.wiki.kernel.org/
-Q: http://patchwork.kernel.org/project/linux-btrfs/list/
+W: https://btrfs.readthedocs.io
+W: https://btrfs.wiki.kernel.org/
+Q: https://patchwork.kernel.org/project/linux-btrfs/list/
C: irc://irc.libera.chat/btrfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
F: Documentation/filesystems/btrfs.rst
F: fs/btrfs/
F: include/linux/btrfs*
+F: include/trace/events/btrfs.h
F: include/uapi/linux/btrfs*
BTTV VIDEO4LINUX DRIVER
@@ -5266,6 +5268,7 @@ F: tools/testing/selftests/cgroup/
CONTROL GROUP - BLOCK IO CONTROLLER (BLKIO)
M: Tejun Heo <tj@kernel.org>
+M: Josef Bacik <josef@toxicpanda.com>
M: Jens Axboe <axboe@kernel.dk>
L: cgroups@vger.kernel.org
L: linux-block@vger.kernel.org
@@ -5273,6 +5276,7 @@ T: git git://git.kernel.dk/linux-block
F: Documentation/admin-guide/cgroup-v1/blkio-controller.rst
F: block/bfq-cgroup.c
F: block/blk-cgroup.c
+F: block/blk-iocost.c
F: block/blk-iolatency.c
F: block/blk-throttle.c
F: include/linux/blk-cgroup.h
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index e7dd457e9b22..e98fcac578d6 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -71,7 +71,7 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) {
if (!wait)
return 0;
- cpu_relax();
+ hwrng_msleep(rng, 1000);
}
num_words = rng_readl(priv, RNG_STATUS) >> 24;
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 09c7ed2650ca..9c5ef818ca36 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -795,7 +795,8 @@ static void __make_buffer_clean(struct dm_buffer *b)
{
BUG_ON(b->hold_count);
- if (!b->state) /* fast case */
+ /* smp_load_acquire() pairs with read_endio()'s smp_mb__before_atomic() */
+ if (!smp_load_acquire(&b->state)) /* fast case */
return;
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
@@ -816,7 +817,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
BUG_ON(test_bit(B_DIRTY, &b->state));
if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep &&
- unlikely(test_bit(B_READING, &b->state)))
+ unlikely(test_bit_acquire(B_READING, &b->state)))
continue;
if (!b->hold_count) {
@@ -1058,7 +1059,7 @@ found_buffer:
* If the user called both dm_bufio_prefetch and dm_bufio_get on
* the same buffer, it would deadlock if we waited.
*/
- if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
+ if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state)))
return NULL;
b->hold_count++;
@@ -1218,7 +1219,7 @@ void dm_bufio_release(struct dm_buffer *b)
* invalid buffer.
*/
if ((b->read_error || b->write_error) &&
- !test_bit(B_READING, &b->state) &&
+ !test_bit_acquire(B_READING, &b->state) &&
!test_bit(B_WRITING, &b->state) &&
!test_bit(B_DIRTY, &b->state)) {
__unlink_buffer(b);
@@ -1479,7 +1480,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_release_move);
static void forget_buffer_locked(struct dm_buffer *b)
{
- if (likely(!b->hold_count) && likely(!b->state)) {
+ if (likely(!b->hold_count) && likely(!smp_load_acquire(&b->state))) {
__unlink_buffer(b);
__free_buffer_wake(b);
}
@@ -1639,7 +1640,7 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
{
if (!(gfp & __GFP_FS) ||
(static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
- if (test_bit(B_READING, &b->state) ||
+ if (test_bit_acquire(B_READING, &b->state) ||
test_bit(B_WRITING, &b->state) ||
test_bit(B_DIRTY, &b->state))
return false;
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index c05fc3436cef..06eb31af626f 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -166,7 +166,7 @@ struct dm_cache_policy_type {
struct dm_cache_policy_type *real;
/*
- * Policies may store a hint for each each cache block.
+ * Policies may store a hint for each cache block.
* Currently the size of this hint must be 0 or 4 bytes but we
* expect to relax this in future.
*/
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 811b0a5379d0..2f1cc66d2641 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2035,7 +2035,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
reason = "max discard sectors smaller than a region";
if (reason) {
- DMWARN("Destination device (%pd) %s: Disabling discard passdown.",
+ DMWARN("Destination device (%pg) %s: Disabling discard passdown.",
dest_dev, reason);
clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 98976aaa9db9..6b3f867d0b70 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -434,10 +434,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
hc = __get_name_cell(new);
if (hc) {
- DMWARN("Unable to change %s on mapped device %s to one that "
- "already exists: %s",
- change_uuid ? "uuid" : "name",
- param->name, new);
+ DMERR("Unable to change %s on mapped device %s to one that "
+ "already exists: %s",
+ change_uuid ? "uuid" : "name",
+ param->name, new);
dm_put(hc->md);
up_write(&_hash_lock);
kfree(new_data);
@@ -449,8 +449,8 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
*/
hc = __get_name_cell(param->name);
if (!hc) {
- DMWARN("Unable to rename non-existent device, %s to %s%s",
- param->name, change_uuid ? "uuid " : "", new);
+ DMERR("Unable to rename non-existent device, %s to %s%s",
+ param->name, change_uuid ? "uuid " : "", new);
up_write(&_hash_lock);
kfree(new_data);
return ERR_PTR(-ENXIO);
@@ -460,9 +460,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
* Does this device already have a uuid?
*/
if (change_uuid && hc->uuid) {
- DMWARN("Unable to change uuid of mapped device %s to %s "
- "because uuid is already set to %s",
- param->name, new, hc->uuid);
+ DMERR("Unable to change uuid of mapped device %s to %s "
+ "because uuid is already set to %s",
+ param->name, new, hc->uuid);
dm_put(hc->md);
up_write(&_hash_lock);
kfree(new_data);
@@ -750,7 +750,7 @@ static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t
static int check_name(const char *name)
{
if (strchr(name, '/')) {
- DMWARN("invalid device name");
+ DMERR("invalid device name");
return -EINVAL;
}
@@ -773,7 +773,7 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *src
down_read(&_hash_lock);
hc = dm_get_mdptr(md);
if (!hc || hc->md != md) {
- DMWARN("device has been removed from the dev hash table.");
+ DMERR("device has been removed from the dev hash table.");
goto out;
}
@@ -1026,7 +1026,7 @@ static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_si
if (new_data < param->data ||
invalid_str(new_data, (void *) param + param_size) || !*new_data ||
strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) {
- DMWARN("Invalid new mapped device name or uuid string supplied.");
+ DMERR("Invalid new mapped device name or uuid string supplied.");
return -EINVAL;
}
@@ -1061,7 +1061,7 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa
if (geostr < param->data ||
invalid_str(geostr, (void *) param + param_size)) {
- DMWARN("Invalid geometry supplied.");
+ DMERR("Invalid geometry supplied.");
goto out;
}
@@ -1069,13 +1069,13 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa
indata + 1, indata + 2, indata + 3, &dummy);
if (x != 4) {
- DMWARN("Unable to interpret geometry settings.");
+ DMERR("Unable to interpret geometry settings.");
goto out;
}
if (indata[0] > 65535 || indata[1] > 255 ||
indata[2] > 255 || indata[3] > ULONG_MAX) {
- DMWARN("Geometry exceeds range limits.");
+ DMERR("Geometry exceeds range limits.");
goto out;
}
@@ -1387,7 +1387,7 @@ static int populate_table(struct dm_table *table,
char *target_params;
if (!param->target_count) {
- DMWARN("populate_table: no targets specified");
+ DMERR("populate_table: no targets specified");
return -EINVAL;
}
@@ -1395,7 +1395,7 @@ static int populate_table(struct dm_table *table,
r = next_target(spec, next, end, &spec, &target_params);
if (r) {
- DMWARN("unable to find target");
+ DMERR("unable to find target");
return r;
}
@@ -1404,7 +1404,7 @@ static int populate_table(struct dm_table *table,
(sector_t) spec->length,
target_params);
if (r) {
- DMWARN("error adding target to table");
+ DMERR("error adding target to table");
return r;
}
@@ -1451,8 +1451,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
if (immutable_target_type &&
(immutable_target_type != dm_table_get_immutable_target_type(t)) &&
!dm_table_get_wildcard_target(t)) {
- DMWARN("can't replace immutable target type %s",
- immutable_target_type->name);
+ DMERR("can't replace immutable target type %s",
+ immutable_target_type->name);
r = -EINVAL;
goto err_unlock_md_type;
}
@@ -1461,12 +1461,12 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
- DMWARN("unable to set up device queue for new table.");
+ DMERR("unable to set up device queue for new table.");
goto err_unlock_md_type;
}
} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
- DMWARN("can't change device type (old=%u vs new=%u) after initial table load.",
- dm_get_md_type(md), dm_table_get_type(t));
+ DMERR("can't change device type (old=%u vs new=%u) after initial table load.",
+ dm_get_md_type(md), dm_table_get_type(t));
r = -EINVAL;
goto err_unlock_md_type;
}
@@ -1477,7 +1477,7 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
down_write(&_hash_lock);
hc = dm_get_mdptr(md);
if (!hc || hc->md != md) {
- DMWARN("device has been removed from the dev hash table.");
+ DMERR("device has been removed from the dev hash table.");
up_write(&_hash_lock);
r = -ENXIO;
goto err_destroy_table;
@@ -1686,19 +1686,19 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
if (tmsg < (struct dm_target_msg *) param->data ||
invalid_str(tmsg->message, (void *) param + param_size)) {
- DMWARN("Invalid target message parameters.");
+ DMERR("Invalid target message parameters.");
r = -EINVAL;
goto out;
}
r = dm_split_args(&argc, &argv, tmsg->message);
if (r) {
- DMWARN("Failed to split target message parameters");
+ DMERR("Failed to split target message parameters");
goto out;
}
if (!argc) {
- DMWARN("Empty message received.");
+ DMERR("Empty message received.");
r = -EINVAL;
goto out_argv;
}
@@ -1718,12 +1718,12 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
ti = dm_table_find_target(table, tmsg->sector);
if (!ti) {
- DMWARN("Target message sector outside device.");
+ DMERR("Target message sector outside device.");
r = -EINVAL;
} else if (ti->type->message)
r = ti->type->message(ti, argc, argv, result, maxlen);
else {
- DMWARN("Target type does not support messages");
+ DMERR("Target type does not support messages");
r = -EINVAL;
}
@@ -1814,11 +1814,11 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
if ((DM_VERSION_MAJOR != version[0]) ||
(DM_VERSION_MINOR < version[1])) {
- DMWARN("ioctl interface mismatch: "
- "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
- DM_VERSION_MAJOR, DM_VERSION_MINOR,
- DM_VERSION_PATCHLEVEL,
- version[0], version[1], version[2], cmd);
+ DMERR("ioctl interface mismatch: "
+ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
+ DM_VERSION_MAJOR, DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL,
+ version[0], version[1], version[2], cmd);
r = -EINVAL;
}
@@ -1927,11 +1927,11 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
if (cmd == DM_DEV_CREATE_CMD) {
if (!*param->name) {
- DMWARN("name not supplied when creating device");
+ DMERR("name not supplied when creating device");
return -EINVAL;
}
} else if (*param->uuid && *param->name) {
- DMWARN("only supply one of name or uuid, cmd(%u)", cmd);
+ DMERR("only supply one of name or uuid, cmd(%u)", cmd);
return -EINVAL;
}
@@ -1978,7 +1978,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us
fn = lookup_ioctl(cmd, &ioctl_flags);
if (!fn) {
- DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
+ DMERR("dm_ctl_ioctl: unknown command 0x%x", command);
return -ENOTTY;
}
@@ -2203,7 +2203,7 @@ int __init dm_early_create(struct dm_ioctl *dmi,
(sector_t) spec_array[i]->length,
target_params_array[i]);
if (r) {
- DMWARN("error adding target to table");
+ DMERR("error adding target to table");
goto err_destroy_table;
}
}
@@ -2216,7 +2216,7 @@ int __init dm_early_create(struct dm_ioctl *dmi,
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
- DMWARN("unable to set up device queue for new table.");
+ DMERR("unable to set up device queue for new table.");
goto err_destroy_table;
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index c640be453313..54263679a7b1 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2529,7 +2529,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
* of the "sync" directive.
*
* With reshaping capability added, we must ensure that
- * that the "sync" directive is disallowed during the reshape.
+ * the "sync" directive is disallowed during the reshape.
*/
if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
continue;
@@ -2590,7 +2590,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
/*
* Adjust data_offset and new_data_offset on all disk members of @rs
- * for out of place reshaping if requested by contructor
+ * for out of place reshaping if requested by constructor
*
* We need free space at the beginning of each raid disk for forward
* and at the end for backward reshapes which userspace has to provide
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 3001b10a3fbf..a41209a43506 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -238,7 +238,7 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped)
dm_requeue_original_request(tio, true);
break;
default:
- DMWARN("unimplemented target endio return value: %d", r);
+ DMCRIT("unimplemented target endio return value: %d", r);
BUG();
}
}
@@ -409,7 +409,7 @@ static int map_request(struct dm_rq_target_io *tio)
dm_kill_unmapped_request(rq, BLK_STS_IOERR);
break;
default:
- DMWARN("unimplemented target map return value: %d", r);
+ DMCRIT("unimplemented target map return value: %d", r);
BUG();
}
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 8326f9fe0e91..f105a71915ab 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -1220,7 +1220,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
return 2; /* this wasn't a stats message */
if (r == -EINVAL)
- DMWARN("Invalid parameters for message %s", argv[0]);
+ DMCRIT("Invalid parameters for message %s", argv[0]);
return r;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index d8034ff0cb24..078da18bb86d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -234,12 +234,12 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
return 0;
if ((start >= dev_size) || (start + len > dev_size)) {
- DMWARN("%s: %pg too small for target: "
- "start=%llu, len=%llu, dev_size=%llu",
- dm_device_name(ti->table->md), bdev,
- (unsigned long long)start,
- (unsigned long long)len,
- (unsigned long long)dev_size);
+ DMERR("%s: %pg too small for target: "
+ "start=%llu, len=%llu, dev_size=%llu",
+ dm_device_name(ti->table->md), bdev,
+ (unsigned long long)start,
+ (unsigned long long)len,
+ (unsigned long long)dev_size);
return 1;
}
@@ -251,10 +251,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
unsigned int zone_sectors = bdev_zone_sectors(bdev);
if (start & (zone_sectors - 1)) {
- DMWARN("%s: start=%llu not aligned to h/w zone size %u of %pg",
- dm_device_name(ti->table->md),
- (unsigned long long)start,
- zone_sectors, bdev);
+ DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
+ dm_device_name(ti->table->md),
+ (unsigned long long)start,
+ zone_sectors, bdev);
return 1;
}
@@ -268,10 +268,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
* the sector range.
*/
if (len & (zone_sectors - 1)) {
- DMWARN("%s: len=%llu not aligned to h/w zone size %u of %pg",
- dm_device_name(ti->table->md),
- (unsigned long long)len,
- zone_sectors, bdev);
+ DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
+ dm_device_name(ti->table->md),
+ (unsigned long long)len,
+ zone_sectors, bdev);
return 1;
}
}
@@ -280,20 +280,20 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
return 0;
if (start & (logical_block_size_sectors - 1)) {
- DMWARN("%s: start=%llu not aligned to h/w "
- "logical block size %u of %pg",
- dm_device_name(ti->table->md),
- (unsigned long long)start,
- limits->logical_block_size, bdev);
+ DMERR("%s: start=%llu not aligned to h/w "
+ "logical block size %u of %pg",
+ dm_device_name(ti->table->md),
+ (unsigned long long)start,
+ limits->logical_block_size, bdev);
return 1;
}
if (len & (logical_block_size_sectors - 1)) {
- DMWARN("%s: len=%llu not aligned to h/w "
- "logical block size %u of %pg",
- dm_device_name(ti->table->md),
- (unsigned long long)len,
- limits->logical_block_size, bdev);
+ DMERR("%s: len=%llu not aligned to h/w "
+ "logical block size %u of %pg",
+ dm_device_name(ti->table->md),
+ (unsigned long long)len,
+ limits->logical_block_size, bdev);
return 1;
}
@@ -434,8 +434,8 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
}
}
if (!found) {
- DMWARN("%s: device %s not in table devices list",
- dm_device_name(ti->table->md), d->name);
+ DMERR("%s: device %s not in table devices list",
+ dm_device_name(ti->table->md), d->name);
return;
}
if (refcount_dec_and_test(&dd->count)) {
@@ -618,12 +618,12 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t,
}
if (remaining) {
- DMWARN("%s: table line %u (start sect %llu len %llu) "
- "not aligned to h/w logical block size %u",
- dm_device_name(t->md), i,
- (unsigned long long) ti->begin,
- (unsigned long long) ti->len,
- limits->logical_block_size);
+ DMERR("%s: table line %u (start sect %llu len %llu) "
+ "not aligned to h/w logical block size %u",
+ dm_device_name(t->md), i,
+ (unsigned long long) ti->begin,
+ (unsigned long long) ti->len,
+ limits->logical_block_size);
return -EINVAL;
}
@@ -1008,7 +1008,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
struct dm_md_mempools *pools;
if (unlikely(type == DM_TYPE_NONE)) {
- DMWARN("no table type is set, can't allocate mempools");
+ DMERR("no table type is set, can't allocate mempools");
return -EINVAL;
}
@@ -1112,7 +1112,7 @@ static bool integrity_profile_exists(struct gendisk *disk)
* Get a disk whose integrity profile reflects the table's profile.
* Returns NULL if integrity support was inconsistent or unavailable.
*/
-static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
+static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t)
{
struct list_head *devices = dm_table_get_devices(t);
struct dm_dev_internal *dd = NULL;
@@ -1185,10 +1185,10 @@ static int dm_table_register_integrity(struct dm_table *t)
* profile the new profile should not conflict.
*/
if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
- DMWARN("%s: conflict with existing integrity profile: "
- "%s profile mismatch",
- dm_device_name(t->md),
- template_disk->disk_name);
+ DMERR("%s: conflict with existing integrity profile: "
+ "%s profile mismatch",
+ dm_device_name(t->md),
+ template_disk->disk_name);
return 1;
}
@@ -1327,7 +1327,7 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
if (t->md->queue &&
!blk_crypto_has_capabilities(profile,
t->md->queue->crypto_profile)) {
- DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
+ DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
dm_destroy_crypto_profile(profile);
return -EINVAL;
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 8a00cc42e498..ccf5b852fbf7 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1401,14 +1401,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
/* WQ_UNBOUND greatly improves performance when running on ramdisk */
wq_flags = WQ_MEM_RECLAIM | WQ_UNBOUND;
- if (v->use_tasklet) {
- /*
- * Allow verify_wq to preempt softirq since verification in
- * tasklet will fall-back to using it for error handling
- * (or if the bufio cache doesn't have required hashes).
- */
- wq_flags |= WQ_HIGHPRI;
- }
+ /*
+ * Using WQ_HIGHPRI improves throughput and completion latency by
+ * reducing wait times when reading from a dm-verity device.
+ *
+ * Also as required for the "try_verify_in_tasklet" feature: WQ_HIGHPRI
+ * allows verify_wq to preempt softirq since verification in tasklet
+ * will fall-back to using it for error handling (or if the bufio cache
+ * doesn't have required hashes).
+ */
+ wq_flags |= WQ_HIGHPRI;
v->verify_wq = alloc_workqueue("kverityd", wq_flags, num_online_cpus());
if (!v->verify_wq) {
ti->error = "Cannot allocate workqueue";
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 60549b65c799..95a1ee3d314e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -864,7 +864,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
if (geo->start > sz) {
- DMWARN("Start sector is beyond the geometry limits.");
+ DMERR("Start sector is beyond the geometry limits.");
return -EINVAL;
}
@@ -1149,7 +1149,7 @@ static void clone_endio(struct bio *bio)
/* The target will handle the io */
return;
default:
- DMWARN("unimplemented target endio return value: %d", r);
+ DMCRIT("unimplemented target endio return value: %d", r);
BUG();
}
}
@@ -1455,7 +1455,7 @@ static void __map_bio(struct bio *clone)
dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
break;
default:
- DMWARN("unimplemented target map return value: %d", r);
+ DMCRIT("unimplemented target map return value: %d", r);
BUG();
}
}
@@ -2005,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor)
md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
if (!md) {
- DMWARN("unable to allocate device, out of memory.");
+ DMERR("unable to allocate device, out of memory.");
return NULL;
}
@@ -2065,7 +2065,6 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->minors = 1;
md->disk->flags |= GENHD_FL_NO_PART;
md->disk->fops = &dm_blk_dops;
- md->disk->queue = md->queue;
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index dce3a16996b9..4ec18ceb2f21 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -138,6 +138,7 @@ struct share_check {
u64 root_objectid;
u64 inum;
int share_count;
+ bool have_delayed_delete_refs;
};
static inline int extent_is_shared(struct share_check *sc)
@@ -820,16 +821,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct preftrees *preftrees, struct share_check *sc)
{
struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
- struct btrfs_key tmp_op_key;
struct rb_node *n;
int count;
int ret = 0;
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-
spin_lock(&head->lock);
for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -855,10 +851,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_key *key_ptr = NULL;
+
+ if (head->extent_op && head->extent_op->update_key) {
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
+ key_ptr = &key;
+ }
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
- &tmp_op_key, ref->level + 1,
+ key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -884,13 +886,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
key.offset = ref->offset;
/*
- * Found a inum that doesn't match our known inum, we
- * know it's shared.
+ * If we have a share check context and a reference for
+ * another inode, we can't exit immediately. This is
+ * because even if this is a BTRFS_ADD_DELAYED_REF
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
+ * which cancels out this ADD reference.
+ *
+ * If this is a DROP reference and there was no previous
+ * ADD reference, then we need to signal that when we
+ * process references from the extent tree (through
+ * add_inline_refs() and add_keyed_refs()), we should
+ * not exit early if we find a reference for another
+ * inode, because one of the delayed DROP references
+ * may cancel that reference in the extent tree.
*/
- if (sc && sc->inum && ref->objectid != sc->inum) {
- ret = BACKREF_FOUND_SHARED;
- goto out;
- }
+ if (sc && count < 0)
+ sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
&key, 0, node->bytenr, count, sc,
@@ -920,7 +931,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
if (!ret)
ret = extent_is_shared(sc);
-out:
+
spin_unlock(&head->lock);
return ret;
}
@@ -1023,7 +1034,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1033,6 +1045,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
ret = add_indirect_ref(fs_info, preftrees, root,
&key, 0, bytenr, count,
sc, GFP_NOFS);
+
break;
}
default:
@@ -1122,7 +1135,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1522,6 +1536,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
{
struct btrfs_backref_shared_cache_entry *entry;
+ if (!cache->use_cache)
+ return false;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return false;
@@ -1557,6 +1574,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
return false;
*is_shared = entry->is_shared;
+ /*
+ * If the node at this level is shared, than all nodes below are also
+ * shared. Currently some of the nodes below may be marked as not shared
+ * because we have just switched from one leaf to another, and switched
+ * also other nodes above the leaf and below the current level, so mark
+ * them as shared.
+ */
+ if (*is_shared) {
+ for (int i = 0; i < level; i++) {
+ cache->entries[i].is_shared = true;
+ cache->entries[i].gen = entry->gen;
+ }
+ }
return true;
}
@@ -1573,6 +1603,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
+ if (!cache->use_cache)
+ return;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return;
@@ -1648,6 +1681,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
.root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
+ .have_delayed_delete_refs = false,
};
int level;
@@ -1669,6 +1703,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
+ cache->use_cache = true;
while (1) {
bool is_shared;
bool cached;
@@ -1698,6 +1733,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
extent_gen > btrfs_root_last_snapshot(&root->root_item))
break;
+ /*
+ * If our data extent was not directly shared (without multiple
+ * reference items), than it might have a single reference item
+ * with a count > 1 for the same offset, which means there are 2
+ * (or more) file extent items that point to the data extent -
+ * this happens when a file extent item needs to be split and
+ * then one item gets moved to another leaf due to a b+tree leaf
+ * split when inserting some item. In this case the file extent
+ * items may be located in different leaves and therefore some
+ * of the leaves may be referenced through shared subtrees while
+ * others are not. Since our extent buffer cache only works for
+ * a single path (by far the most common case and simpler to
+ * deal with), we can not use it if we have multiple leaves
+ * (which implies multiple paths).
+ */
+ if (level == -1 && tmp->nnodes > 1)
+ cache->use_cache = false;
+
if (level >= 0)
store_backref_shared_cache(cache, root, bytenr,
level, false);
@@ -1713,6 +1766,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
break;
}
shared.share_count = 0;
+ shared.have_delayed_delete_refs = false;
cond_resched();
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 52ae6957b414..8e69584d538d 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache {
* a given data extent should never exceed the maximum b+tree height.
*/
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
+ bool use_cache;
};
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 32c415cfbdfe..deebc8ddbd93 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- /* REVIEW */
if (wait && caching_ctl)
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
- /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 618275af19c4..83cb0378096f 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
int err;
u64 failed_start;
- while (1) {
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
+ cached_state, NULL, GFP_NOFS);
+ while (err == -EEXIST) {
+ if (failed_start != start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, cached_state);
+
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, cached_state, NULL,
GFP_NOFS);
- if (err == -EEXIST) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else
- break;
- WARN_ON(start > end);
}
return err;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4ef4167072b8..ec6e1752af2c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
switch (sctx->proto) {
case 1: return cmd <= BTRFS_SEND_C_MAX_V1;
case 2: return cmd <= BTRFS_SEND_C_MAX_V2;
+ case 3: return cmd <= BTRFS_SEND_C_MAX_V3;
default: return false;
}
}
@@ -6469,7 +6470,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret < 0)
goto out;
}
- if (sctx->cur_inode_needs_verity) {
+
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
+ && sctx->cur_inode_needs_verity) {
ret = process_verity(sctx);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 0a4537775e0c..f7585cfa7e52 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -10,7 +10,12 @@
#include <linux/types.h>
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
+/* Conditional support for the upcoming protocol version. */
+#ifdef CONFIG_BTRFS_DEBUG
+#define BTRFS_SEND_STREAM_VERSION 3
+#else
#define BTRFS_SEND_STREAM_VERSION 2
+#endif
/*
* In send stream v1, no command is larger than 64K. In send stream v2, no limit
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 998cd26a1b3b..fe05bc51f9f2 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -590,14 +590,17 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
mutex_lock(&erofs_domain_cookies_lock);
+ spin_lock(&psb->s_inode_list_lock);
list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
ctx = inode->i_private;
if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
continue;
igrab(inode);
+ spin_unlock(&psb->s_inode_list_lock);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
+ spin_unlock(&psb->s_inode_list_lock);
ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 559380a535af..c7f24fc7efd5 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -813,15 +813,14 @@ retry:
++spiltted;
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
-
- if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
- !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
- fe->pcl->length == map->m_llen)
- fe->pcl->partial = false;
if (fe->pcl->length < offset + end - map->m_la) {
fe->pcl->length = offset + end - map->m_la;
fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
}
+ if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+ !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
+ fe->pcl->length == map->m_llen)
+ fe->pcl->partial = false;
next_part:
/* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
@@ -888,15 +887,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
unsigned int pgnr;
- struct page *oldpage;
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
- oldpage = be->decompressed_pages[pgnr];
- be->decompressed_pages[pgnr] = bvec->page;
-
- if (!oldpage)
+ if (!be->decompressed_pages[pgnr]) {
+ be->decompressed_pages[pgnr] = bvec->page;
return;
+ }
}
/* (cold path) one pcluster is requested multiple times */
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index e7f04c4fbb81..d98c95212985 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
}
/*
- * bit 31: I/O error occurred on this page
- * bit 0 - 30: remaining parts to complete this page
+ * bit 30: I/O error occurred on this page
+ * bit 0 - 29: remaining parts to complete this page
*/
-#define Z_EROFS_PAGE_EIO (1 << 31)
+#define Z_EROFS_PAGE_EIO (1 << 30)
static inline void z_erofs_onlinepage_init(struct page *page)
{
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 44c27ef39c43..0bb66927e3d0 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -57,8 +57,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
- kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
- EROFS_KMAP_ATOMIC);
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(kaddr)) {
err = PTR_ERR(kaddr);
goto out_unlock;
@@ -73,7 +72,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
vi->z_tailextent_headlcn = 0;
- goto unmap_done;
+ goto done;
}
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
@@ -85,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
err = -EOPNOTSUPP;
- goto unmap_done;
+ goto out_put_metabuf;
}
vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
@@ -95,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
@@ -103,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
-unmap_done:
- erofs_put_metabuf(&buf);
- if (err)
- goto out_unlock;
if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
struct erofs_map_blocks map = {
@@ -127,7 +122,7 @@ unmap_done:
err = -EFSCORRUPTED;
}
if (err < 0)
- goto out_unlock;
+ goto out_put_metabuf;
}
if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
@@ -141,11 +136,14 @@ unmap_done:
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (err < 0)
- goto out_unlock;
+ goto out_put_metabuf;
}
+done:
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
+out_put_metabuf:
+ erofs_put_metabuf(&buf);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f2a9f2274c3b..528bd44b59e2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -106,6 +106,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup *cgroup_get_from_path(const char *path);
struct cgroup *cgroup_get_from_fd(int fd);
+struct cgroup *cgroup_v1v2_get_from_fd(int fd);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index 0d200a993489..9fcf09f2ef00 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -196,7 +196,7 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
return -EINVAL;
if (fd)
- cgrp = cgroup_get_from_fd(fd);
+ cgrp = cgroup_v1v2_get_from_fd(fd);
else if (id)
cgrp = cgroup_get_from_id(id);
else /* walk the entire hierarchy by default. */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7f486677ab1f..2319946715e0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}
+/*
+ * Returned cgroup is without refcount but it's valid as long as cset pins it.
+ */
static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
@@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
res_cgroup = cset->dfl_cgrp;
} else {
struct cgrp_cset_link *link;
+ lockdep_assert_held(&css_set_lock);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
@@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}
+ BUG_ON(!res_cgroup);
return res_cgroup;
}
@@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
rcu_read_unlock();
- BUG_ON(!res);
return res;
}
+/*
+ * Look up cgroup associated with current task's cgroup namespace on the default
+ * hierarchy.
+ *
+ * Unlike current_cgns_cgroup_from_root(), this doesn't need locks:
+ * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu
+ * pointers.
+ * - css_set_lock is not needed because we just read cset->dfl_cgrp.
+ * - As a bonus returned cgrp is pinned with the current because it cannot
+ * switch cgroup_ns asynchronously.
+ */
+static struct cgroup *current_cgns_cgroup_dfl(void)
+{
+ struct css_set *cset;
+
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+}
+
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
- struct cgroup *res = NULL;
-
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
- res = __cset_cgroup_from_root(cset, root);
-
- BUG_ON(!res);
- return res;
+ return __cset_cgroup_from_root(cset, root);
}
/*
@@ -6191,9 +6209,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!cgrp)
return ERR_PTR(-ENOENT);
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
- spin_unlock_irq(&css_set_lock);
+ root_cgrp = current_cgns_cgroup_dfl();
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-ENOENT);
@@ -6294,16 +6310,42 @@ void cgroup_fork(struct task_struct *child)
INIT_LIST_HEAD(&child->cg_list);
}
-static struct cgroup *cgroup_get_from_file(struct file *f)
+/**
+ * cgroup_v1v2_get_from_file - get a cgroup pointer from a file pointer
+ * @f: file corresponding to cgroup_dir
+ *
+ * Find the cgroup from a file pointer associated with a cgroup directory.
+ * Returns a pointer to the cgroup on success. ERR_PTR is returned if the
+ * cgroup cannot be found.
+ */
+static struct cgroup *cgroup_v1v2_get_from_file(struct file *f)
{
struct cgroup_subsys_state *css;
- struct cgroup *cgrp;
css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
if (IS_ERR(css))
return ERR_CAST(css);
- cgrp = css->cgroup;
+ return css->cgroup;
+}
+
+/**
+ * cgroup_get_from_file - same as cgroup_v1v2_get_from_file, but only supports
+ * cgroup2.
+ * @f: file corresponding to cgroup2_dir
+ */
+static struct cgroup *cgroup_get_from_file(struct file *f)
+{
+ struct cgroup *cgrp = cgroup_v1v2_get_from_file(f);
+
+ if (IS_ERR(cgrp))
+ return ERR_CAST(cgrp);
+
+ if (!cgroup_on_dfl(cgrp)) {
+ cgroup_put(cgrp);
+ return ERR_PTR(-EBADF);
+ }
+
return cgrp;
}
@@ -6772,10 +6814,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
struct cgroup *cgrp = ERR_PTR(-ENOENT);
struct cgroup *root_cgrp;
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
+ root_cgrp = current_cgns_cgroup_dfl();
kn = kernfs_walk_and_get(root_cgrp->kn, path);
- spin_unlock_irq(&css_set_lock);
if (!kn)
goto out;
@@ -6800,15 +6840,15 @@ out:
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
/**
- * cgroup_get_from_fd - get a cgroup pointer from a fd
- * @fd: fd obtained by open(cgroup2_dir)
+ * cgroup_v1v2_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup_dir)
*
* Find the cgroup from a fd which should be obtained
* by opening a cgroup directory. Returns a pointer to the
* cgroup on success. ERR_PTR is returned if the cgroup
* cannot be found.
*/
-struct cgroup *cgroup_get_from_fd(int fd)
+struct cgroup *cgroup_v1v2_get_from_fd(int fd)
{
struct cgroup *cgrp;
struct file *f;
@@ -6817,10 +6857,29 @@ struct cgroup *cgroup_get_from_fd(int fd)
if (!f)
return ERR_PTR(-EBADF);
- cgrp = cgroup_get_from_file(f);
+ cgrp = cgroup_v1v2_get_from_file(f);
fput(f);
return cgrp;
}
+
+/**
+ * cgroup_get_from_fd - same as cgroup_v1v2_get_from_fd, but only supports
+ * cgroup2.
+ * @fd: fd obtained by open(cgroup2_dir)
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+ struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd);
+
+ if (IS_ERR(cgrp))
+ return ERR_CAST(cgrp);
+
+ if (!cgroup_on_dfl(cgrp)) {
+ cgroup_put(cgrp);
+ return ERR_PTR(-EBADF);
+ }
+ return cgrp;
+}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
static u64 power_of_ten(int power)