diff options
-rw-r--r-- | Documentation/admin-guide/device-mapper/verity.rst | 4 | ||||
-rw-r--r-- | MAINTAINERS | 8 | ||||
-rw-r--r-- | drivers/char/hw_random/bcm2835-rng.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-clone-target.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 78 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-stats.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 78 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 18 | ||||
-rw-r--r-- | drivers/md/dm.c | 9 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 84 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 1 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-io-tree.c | 15 | ||||
-rw-r--r-- | fs/btrfs/send.c | 5 | ||||
-rw-r--r-- | fs/btrfs/send.h | 5 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 3 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 17 | ||||
-rw-r--r-- | fs/erofs/zdata.h | 6 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 22 | ||||
-rw-r--r-- | include/linux/cgroup.h | 1 | ||||
-rw-r--r-- | kernel/bpf/cgroup_iter.c | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 99 |
26 files changed, 308 insertions, 178 deletions
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index 1a6b91368e59..a65c1602cb23 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -141,6 +141,10 @@ root_hash_sig_key_desc <key_description> also gain new certificates at run time if they are signed by a certificate already in the secondary trusted keyring. +try_verify_in_tasklet + If verity hashes are in cache, verify data blocks in kernel tasklet instead + of workqueue. This option can reduce IO latency. + Theory of operation =================== diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..ca063a504026 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4459,13 +4459,15 @@ M: Josef Bacik <josef@toxicpanda.com> M: David Sterba <dsterba@suse.com> L: linux-btrfs@vger.kernel.org S: Maintained -W: http://btrfs.wiki.kernel.org/ -Q: http://patchwork.kernel.org/project/linux-btrfs/list/ +W: https://btrfs.readthedocs.io +W: https://btrfs.wiki.kernel.org/ +Q: https://patchwork.kernel.org/project/linux-btrfs/list/ C: irc://irc.libera.chat/btrfs T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git F: Documentation/filesystems/btrfs.rst F: fs/btrfs/ F: include/linux/btrfs* +F: include/trace/events/btrfs.h F: include/uapi/linux/btrfs* BTTV VIDEO4LINUX DRIVER @@ -5266,6 +5268,7 @@ F: tools/testing/selftests/cgroup/ CONTROL GROUP - BLOCK IO CONTROLLER (BLKIO) M: Tejun Heo <tj@kernel.org> +M: Josef Bacik <josef@toxicpanda.com> M: Jens Axboe <axboe@kernel.dk> L: cgroups@vger.kernel.org L: linux-block@vger.kernel.org @@ -5273,6 +5276,7 @@ T: git git://git.kernel.dk/linux-block F: Documentation/admin-guide/cgroup-v1/blkio-controller.rst F: block/bfq-cgroup.c F: block/blk-cgroup.c +F: block/blk-iocost.c F: block/blk-iolatency.c F: block/blk-throttle.c F: include/linux/blk-cgroup.h diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index e7dd457e9b22..e98fcac578d6 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -71,7 +71,7 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) { if (!wait) return 0; - cpu_relax(); + hwrng_msleep(rng, 1000); } num_words = rng_readl(priv, RNG_STATUS) >> 24; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 09c7ed2650ca..9c5ef818ca36 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -795,7 +795,8 @@ static void __make_buffer_clean(struct dm_buffer *b) { BUG_ON(b->hold_count); - if (!b->state) /* fast case */ + /* smp_load_acquire() pairs with read_endio()'s smp_mb__before_atomic() */ + if (!smp_load_acquire(&b->state)) /* fast case */ return; wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); @@ -816,7 +817,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) BUG_ON(test_bit(B_DIRTY, &b->state)); if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep && - unlikely(test_bit(B_READING, &b->state))) + unlikely(test_bit_acquire(B_READING, &b->state))) continue; if (!b->hold_count) { @@ -1058,7 +1059,7 @@ found_buffer: * If the user called both dm_bufio_prefetch and dm_bufio_get on * the same buffer, it would deadlock if we waited. */ - if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state))) + if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) return NULL; b->hold_count++; @@ -1218,7 +1219,7 @@ void dm_bufio_release(struct dm_buffer *b) * invalid buffer. */ if ((b->read_error || b->write_error) && - !test_bit(B_READING, &b->state) && + !test_bit_acquire(B_READING, &b->state) && !test_bit(B_WRITING, &b->state) && !test_bit(B_DIRTY, &b->state)) { __unlink_buffer(b); @@ -1479,7 +1480,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_release_move); static void forget_buffer_locked(struct dm_buffer *b) { - if (likely(!b->hold_count) && likely(!b->state)) { + if (likely(!b->hold_count) && likely(!smp_load_acquire(&b->state))) { __unlink_buffer(b); __free_buffer_wake(b); } @@ -1639,7 +1640,7 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) { if (!(gfp & __GFP_FS) || (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) { - if (test_bit(B_READING, &b->state) || + if (test_bit_acquire(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) return false; diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index c05fc3436cef..06eb31af626f 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -166,7 +166,7 @@ struct dm_cache_policy_type { struct dm_cache_policy_type *real; /* - * Policies may store a hint for each each cache block. + * Policies may store a hint for each cache block. * Currently the size of this hint must be 0 or 4 bytes but we * expect to relax this in future. */ diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 811b0a5379d0..2f1cc66d2641 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2035,7 +2035,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) reason = "max discard sectors smaller than a region"; if (reason) { - DMWARN("Destination device (%pd) %s: Disabling discard passdown.", + DMWARN("Destination device (%pg) %s: Disabling discard passdown.", dest_dev, reason); clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 98976aaa9db9..6b3f867d0b70 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -434,10 +434,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, hc = __get_name_cell(new); if (hc) { - DMWARN("Unable to change %s on mapped device %s to one that " - "already exists: %s", - change_uuid ? "uuid" : "name", - param->name, new); + DMERR("Unable to change %s on mapped device %s to one that " + "already exists: %s", + change_uuid ? "uuid" : "name", + param->name, new); dm_put(hc->md); up_write(&_hash_lock); kfree(new_data); @@ -449,8 +449,8 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, */ hc = __get_name_cell(param->name); if (!hc) { - DMWARN("Unable to rename non-existent device, %s to %s%s", - param->name, change_uuid ? "uuid " : "", new); + DMERR("Unable to rename non-existent device, %s to %s%s", + param->name, change_uuid ? "uuid " : "", new); up_write(&_hash_lock); kfree(new_data); return ERR_PTR(-ENXIO); @@ -460,9 +460,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, * Does this device already have a uuid? */ if (change_uuid && hc->uuid) { - DMWARN("Unable to change uuid of mapped device %s to %s " - "because uuid is already set to %s", - param->name, new, hc->uuid); + DMERR("Unable to change uuid of mapped device %s to %s " + "because uuid is already set to %s", + param->name, new, hc->uuid); dm_put(hc->md); up_write(&_hash_lock); kfree(new_data); @@ -750,7 +750,7 @@ static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t static int check_name(const char *name) { if (strchr(name, '/')) { - DMWARN("invalid device name"); + DMERR("invalid device name"); return -EINVAL; } @@ -773,7 +773,7 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *src down_read(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + DMERR("device has been removed from the dev hash table."); goto out; } @@ -1026,7 +1026,7 @@ static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_si if (new_data < param->data || invalid_str(new_data, (void *) param + param_size) || !*new_data || strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) { - DMWARN("Invalid new mapped device name or uuid string supplied."); + DMERR("Invalid new mapped device name or uuid string supplied."); return -EINVAL; } @@ -1061,7 +1061,7 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa if (geostr < param->data || invalid_str(geostr, (void *) param + param_size)) { - DMWARN("Invalid geometry supplied."); + DMERR("Invalid geometry supplied."); goto out; } @@ -1069,13 +1069,13 @@ static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t pa indata + 1, indata + 2, indata + 3, &dummy); if (x != 4) { - DMWARN("Unable to interpret geometry settings."); + DMERR("Unable to interpret geometry settings."); goto out; } if (indata[0] > 65535 || indata[1] > 255 || indata[2] > 255 || indata[3] > ULONG_MAX) { - DMWARN("Geometry exceeds range limits."); + DMERR("Geometry exceeds range limits."); goto out; } @@ -1387,7 +1387,7 @@ static int populate_table(struct dm_table *table, char *target_params; if (!param->target_count) { - DMWARN("populate_table: no targets specified"); + DMERR("populate_table: no targets specified"); return -EINVAL; } @@ -1395,7 +1395,7 @@ static int populate_table(struct dm_table *table, r = next_target(spec, next, end, &spec, &target_params); if (r) { - DMWARN("unable to find target"); + DMERR("unable to find target"); return r; } @@ -1404,7 +1404,7 @@ static int populate_table(struct dm_table *table, (sector_t) spec->length, target_params); if (r) { - DMWARN("error adding target to table"); + DMERR("error adding target to table"); return r; } @@ -1451,8 +1451,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si if (immutable_target_type && (immutable_target_type != dm_table_get_immutable_target_type(t)) && !dm_table_get_wildcard_target(t)) { - DMWARN("can't replace immutable target type %s", - immutable_target_type->name); + DMERR("can't replace immutable target type %s", + immutable_target_type->name); r = -EINVAL; goto err_unlock_md_type; } @@ -1461,12 +1461,12 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { - DMWARN("unable to set up device queue for new table."); + DMERR("unable to set up device queue for new table."); goto err_unlock_md_type; } } else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) { - DMWARN("can't change device type (old=%u vs new=%u) after initial table load.", - dm_get_md_type(md), dm_table_get_type(t)); + DMERR("can't change device type (old=%u vs new=%u) after initial table load.", + dm_get_md_type(md), dm_table_get_type(t)); r = -EINVAL; goto err_unlock_md_type; } @@ -1477,7 +1477,7 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + DMERR("device has been removed from the dev hash table."); up_write(&_hash_lock); r = -ENXIO; goto err_destroy_table; @@ -1686,19 +1686,19 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para if (tmsg < (struct dm_target_msg *) param->data || invalid_str(tmsg->message, (void *) param + param_size)) { - DMWARN("Invalid target message parameters."); + DMERR("Invalid target message parameters."); r = -EINVAL; goto out; } r = dm_split_args(&argc, &argv, tmsg->message); if (r) { - DMWARN("Failed to split target message parameters"); + DMERR("Failed to split target message parameters"); goto out; } if (!argc) { - DMWARN("Empty message received."); + DMERR("Empty message received."); r = -EINVAL; goto out_argv; } @@ -1718,12 +1718,12 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para ti = dm_table_find_target(table, tmsg->sector); if (!ti) { - DMWARN("Target message sector outside device."); + DMERR("Target message sector outside device."); r = -EINVAL; } else if (ti->type->message) r = ti->type->message(ti, argc, argv, result, maxlen); else { - DMWARN("Target type does not support messages"); + DMERR("Target type does not support messages"); r = -EINVAL; } @@ -1814,11 +1814,11 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) if ((DM_VERSION_MAJOR != version[0]) || (DM_VERSION_MINOR < version[1])) { - DMWARN("ioctl interface mismatch: " - "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", - DM_VERSION_MAJOR, DM_VERSION_MINOR, - DM_VERSION_PATCHLEVEL, - version[0], version[1], version[2], cmd); + DMERR("ioctl interface mismatch: " + "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", + DM_VERSION_MAJOR, DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL, + version[0], version[1], version[2], cmd); r = -EINVAL; } @@ -1927,11 +1927,11 @@ static int validate_params(uint cmd, struct dm_ioctl *param) if (cmd == DM_DEV_CREATE_CMD) { if (!*param->name) { - DMWARN("name not supplied when creating device"); + DMERR("name not supplied when creating device"); return -EINVAL; } } else if (*param->uuid && *param->name) { - DMWARN("only supply one of name or uuid, cmd(%u)", cmd); + DMERR("only supply one of name or uuid, cmd(%u)", cmd); return -EINVAL; } @@ -1978,7 +1978,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us fn = lookup_ioctl(cmd, &ioctl_flags); if (!fn) { - DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); + DMERR("dm_ctl_ioctl: unknown command 0x%x", command); return -ENOTTY; } @@ -2203,7 +2203,7 @@ int __init dm_early_create(struct dm_ioctl *dmi, (sector_t) spec_array[i]->length, target_params_array[i]); if (r) { - DMWARN("error adding target to table"); + DMERR("error adding target to table"); goto err_destroy_table; } } @@ -2216,7 +2216,7 @@ int __init dm_early_create(struct dm_ioctl *dmi, /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md, t); if (r) { - DMWARN("unable to set up device queue for new table."); + DMERR("unable to set up device queue for new table."); goto err_destroy_table; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c640be453313..54263679a7b1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2529,7 +2529,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) * of the "sync" directive. * * With reshaping capability added, we must ensure that - * that the "sync" directive is disallowed during the reshape. + * the "sync" directive is disallowed during the reshape. */ if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) continue; @@ -2590,7 +2590,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) /* * Adjust data_offset and new_data_offset on all disk members of @rs - * for out of place reshaping if requested by contructor + * for out of place reshaping if requested by constructor * * We need free space at the beginning of each raid disk for forward * and at the end for backward reshapes which userspace has to provide diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 3001b10a3fbf..a41209a43506 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -238,7 +238,7 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped) dm_requeue_original_request(tio, true); break; default: - DMWARN("unimplemented target endio return value: %d", r); + DMCRIT("unimplemented target endio return value: %d", r); BUG(); } } @@ -409,7 +409,7 @@ static int map_request(struct dm_rq_target_io *tio) dm_kill_unmapped_request(rq, BLK_STS_IOERR); break; default: - DMWARN("unimplemented target map return value: %d", r); + DMCRIT("unimplemented target map return value: %d", r); BUG(); } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8326f9fe0e91..f105a71915ab 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -1220,7 +1220,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, return 2; /* this wasn't a stats message */ if (r == -EINVAL) - DMWARN("Invalid parameters for message %s", argv[0]); + DMCRIT("Invalid parameters for message %s", argv[0]); return r; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d8034ff0cb24..078da18bb86d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -234,12 +234,12 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 0; if ((start >= dev_size) || (start + len > dev_size)) { - DMWARN("%s: %pg too small for target: " - "start=%llu, len=%llu, dev_size=%llu", - dm_device_name(ti->table->md), bdev, - (unsigned long long)start, - (unsigned long long)len, - (unsigned long long)dev_size); + DMERR("%s: %pg too small for target: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdev, + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); return 1; } @@ -251,10 +251,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, unsigned int zone_sectors = bdev_zone_sectors(bdev); if (start & (zone_sectors - 1)) { - DMWARN("%s: start=%llu not aligned to h/w zone size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)start, - zone_sectors, bdev); + DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)start, + zone_sectors, bdev); return 1; } @@ -268,10 +268,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, * the sector range. */ if (len & (zone_sectors - 1)) { - DMWARN("%s: len=%llu not aligned to h/w zone size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)len, - zone_sectors, bdev); + DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)len, + zone_sectors, bdev); return 1; } } @@ -280,20 +280,20 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 0; if (start & (logical_block_size_sectors - 1)) { - DMWARN("%s: start=%llu not aligned to h/w " - "logical block size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)start, - limits->logical_block_size, bdev); + DMERR("%s: start=%llu not aligned to h/w " + "logical block size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)start, + limits->logical_block_size, bdev); return 1; } if (len & (logical_block_size_sectors - 1)) { - DMWARN("%s: len=%llu not aligned to h/w " - "logical block size %u of %pg", - dm_device_name(ti->table->md), - (unsigned long long)len, - limits->logical_block_size, bdev); + DMERR("%s: len=%llu not aligned to h/w " + "logical block size %u of %pg", + dm_device_name(ti->table->md), + (unsigned long long)len, + limits->logical_block_size, bdev); return 1; } @@ -434,8 +434,8 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) } } if (!found) { - DMWARN("%s: device %s not in table devices list", - dm_device_name(ti->table->md), d->name); + DMERR("%s: device %s not in table devices list", + dm_device_name(ti->table->md), d->name); return; } if (refcount_dec_and_test(&dd->count)) { @@ -618,12 +618,12 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t, } if (remaining) { - DMWARN("%s: table line %u (start sect %llu len %llu) " - "not aligned to h/w logical block size %u", - dm_device_name(t->md), i, - (unsigned long long) ti->begin, - (unsigned long long) ti->len, - limits->logical_block_size); + DMERR("%s: table line %u (start sect %llu len %llu) " + "not aligned to h/w logical block size %u", + dm_device_name(t->md), i, + (unsigned long long) ti->begin, + (unsigned long long) ti->len, + limits->logical_block_size); return -EINVAL; } @@ -1008,7 +1008,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * struct dm_md_mempools *pools; if (unlikely(type == DM_TYPE_NONE)) { - DMWARN("no table type is set, can't allocate mempools"); + DMERR("no table type is set, can't allocate mempools"); return -EINVAL; } @@ -1112,7 +1112,7 @@ static bool integrity_profile_exists(struct gendisk *disk) * Get a disk whose integrity profile reflects the table's profile. * Returns NULL if integrity support was inconsistent or unavailable. */ -static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t) +static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t) { struct list_head *devices = dm_table_get_devices(t); struct dm_dev_internal *dd = NULL; @@ -1185,10 +1185,10 @@ static int dm_table_register_integrity(struct dm_table *t) * profile the new profile should not conflict. */ if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { - DMWARN("%s: conflict with existing integrity profile: " - "%s profile mismatch", - dm_device_name(t->md), - template_disk->disk_name); + DMERR("%s: conflict with existing integrity profile: " + "%s profile mismatch", + dm_device_name(t->md), + template_disk->disk_name); return 1; } @@ -1327,7 +1327,7 @@ static int dm_table_construct_crypto_profile(struct dm_table *t) if (t->md->queue && !blk_crypto_has_capabilities(profile, t->md->queue->crypto_profile)) { - DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); + DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); dm_destroy_crypto_profile(profile); return -EINVAL; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 8a00cc42e498..ccf5b852fbf7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1401,14 +1401,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) /* WQ_UNBOUND greatly improves performance when running on ramdisk */ wq_flags = WQ_MEM_RECLAIM | WQ_UNBOUND; - if (v->use_tasklet) { - /* - * Allow verify_wq to preempt softirq since verification in - * tasklet will fall-back to using it for error handling - * (or if the bufio cache doesn't have required hashes). - */ - wq_flags |= WQ_HIGHPRI; - } + /* + * Using WQ_HIGHPRI improves throughput and completion latency by + * reducing wait times when reading from a dm-verity device. + * + * Also as required for the "try_verify_in_tasklet" feature: WQ_HIGHPRI + * allows verify_wq to preempt softirq since verification in tasklet + * will fall-back to using it for error handling (or if the bufio cache + * doesn't have required hashes). + */ + wq_flags |= WQ_HIGHPRI; v->verify_wq = alloc_workqueue("kverityd", wq_flags, num_online_cpus()); if (!v->verify_wq) { ti->error = "Cannot allocate workqueue"; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 60549b65c799..95a1ee3d314e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -864,7 +864,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; if (geo->start > sz) { - DMWARN("Start sector is beyond the geometry limits."); + DMERR("Start sector is beyond the geometry limits."); return -EINVAL; } @@ -1149,7 +1149,7 @@ static void clone_endio(struct bio *bio) /* The target will handle the io */ return; default: - DMWARN("unimplemented target endio return value: %d", r); + DMCRIT("unimplemented target endio return value: %d", r); BUG(); } } @@ -1455,7 +1455,7 @@ static void __map_bio(struct bio *clone) dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); break; default: - DMWARN("unimplemented target map return value: %d", r); + DMCRIT("unimplemented target map return value: %d", r); BUG(); } } @@ -2005,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor) md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); if (!md) { - DMWARN("unable to allocate device, out of memory."); + DMERR("unable to allocate device, out of memory."); return NULL; } @@ -2065,7 +2065,6 @@ static struct mapped_device *alloc_dev(int minor) md->disk->minors = 1; md->disk->flags |= GENHD_FL_NO_PART; md->disk->fops = &dm_blk_dops; - md->disk->queue = md->queue; md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dce3a16996b9..4ec18ceb2f21 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -820,16 +821,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -855,10 +851,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; @@ -884,13 +886,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -920,7 +931,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1023,7 +1034,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1033,6 +1045,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1122,7 +1135,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1522,6 +1536,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache { struct btrfs_backref_shared_cache_entry *entry; + if (!cache->use_cache) + return false; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return false; @@ -1557,6 +1574,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache return false; *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + cache->entries[i].is_shared = true; + cache->entries[i].gen = entry->gen; + } + } return true; } @@ -1573,6 +1603,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, struct btrfs_backref_shared_cache_entry *entry; u64 gen; + if (!cache->use_cache) + return; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return; @@ -1648,6 +1681,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; int level; @@ -1669,6 +1703,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); + cache->use_cache = true; while (1) { bool is_shared; bool cached; @@ -1698,6 +1733,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, extent_gen > btrfs_root_last_snapshot(&root->root_item)) break; + /* + * If our data extent was not directly shared (without multiple + * reference items), than it might have a single reference item + * with a count > 1 for the same offset, which means there are 2 + * (or more) file extent items that point to the data extent - + * this happens when a file extent item needs to be split and + * then one item gets moved to another leaf due to a b+tree leaf + * split when inserting some item. In this case the file extent + * items may be located in different leaves and therefore some + * of the leaves may be referenced through shared subtrees while + * others are not. Since our extent buffer cache only works for + * a single path (by far the most common case and simpler to + * deal with), we can not use it if we have multiple leaves + * (which implies multiple paths). + */ + if (level == -1 && tmp->nnodes > 1) + cache->use_cache = false; + if (level >= 0) store_backref_shared_cache(cache, root, bytenr, level, false); @@ -1713,6 +1766,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; } shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 52ae6957b414..8e69584d538d 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache { * a given data extent should never exceed the maximum b+tree height. */ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; + bool use_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 32c415cfbdfe..deebc8ddbd93 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - /* REVIEW */ if (wait && caching_ctl) ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); - /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */ if (caching_ctl) btrfs_put_caching_control(caching_ctl); diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 618275af19c4..83cb0378096f 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int err; u64 failed_start; - while (1) { + err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, + cached_state, NULL, GFP_NOFS); + while (err == -EEXIST) { + if (failed_start != start) + clear_extent_bit(tree, start, failed_start - 1, + EXTENT_LOCKED, cached_state); + + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, cached_state, NULL, GFP_NOFS); - if (err == -EEXIST) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else - break; - WARN_ON(start > end); } return err; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ef4167072b8..ec6e1752af2c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd) switch (sctx->proto) { case 1: return cmd <= BTRFS_SEND_C_MAX_V1; case 2: return cmd <= BTRFS_SEND_C_MAX_V2; + case 3: return cmd <= BTRFS_SEND_C_MAX_V3; default: return false; } } @@ -6469,7 +6470,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret < 0) goto out; } - if (sctx->cur_inode_needs_verity) { + + if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY) + && sctx->cur_inode_needs_verity) { ret = process_verity(sctx); if (ret < 0) goto out; diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 0a4537775e0c..f7585cfa7e52 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -10,7 +10,12 @@ #include <linux/types.h> #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +/* Conditional support for the upcoming protocol version. */ +#ifdef CONFIG_BTRFS_DEBUG +#define BTRFS_SEND_STREAM_VERSION 3 +#else #define BTRFS_SEND_STREAM_VERSION 2 +#endif /* * In send stream v1, no command is larger than 64K. In send stream v2, no limit diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 998cd26a1b3b..fe05bc51f9f2 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -590,14 +590,17 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, struct super_block *psb = erofs_pseudo_mnt->mnt_sb; mutex_lock(&erofs_domain_cookies_lock); + spin_lock(&psb->s_inode_list_lock); list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { ctx = inode->i_private; if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) continue; igrab(inode); + spin_unlock(&psb->s_inode_list_lock); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } + spin_unlock(&psb->s_inode_list_lock); ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); mutex_unlock(&erofs_domain_cookies_lock); return ctx; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 559380a535af..c7f24fc7efd5 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -813,15 +813,14 @@ retry: ++spiltted; if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; - - if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && - !(map->m_flags & EROFS_MAP_PARTIAL_REF) && - fe->pcl->length == map->m_llen) - fe->pcl->partial = false; if (fe->pcl->length < offset + end - map->m_la) { fe->pcl->length = offset + end - map->m_la; fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; } + if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && + fe->pcl->length == map->m_llen) + fe->pcl->partial = false; next_part: /* shorten the remaining extent to update progress */ map->m_llen = offset + cur - map->m_la; @@ -888,15 +887,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { unsigned int pgnr; - struct page *oldpage; pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); - oldpage = be->decompressed_pages[pgnr]; - be->decompressed_pages[pgnr] = bvec->page; - - if (!oldpage) + if (!be->decompressed_pages[pgnr]) { + be->decompressed_pages[pgnr] = bvec->page; return; + } } /* (cold path) one pcluster is requested multiple times */ diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index e7f04c4fbb81..d98c95212985 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) } /* - * bit 31: I/O error occurred on this page - * bit 0 - 30: remaining parts to complete this page + * bit 30: I/O error occurred on this page + * bit 0 - 29: remaining parts to complete this page */ -#define Z_EROFS_PAGE_EIO (1 << 31) +#define Z_EROFS_PAGE_EIO (1 << 30) static inline void z_erofs_onlinepage_init(struct page *page) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 44c27ef39c43..0bb66927e3d0 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -57,8 +57,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), - EROFS_KMAP_ATOMIC); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); if (IS_ERR(kaddr)) { err = PTR_ERR(kaddr); goto out_unlock; @@ -73,7 +72,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; - goto unmap_done; + goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; @@ -85,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; - goto unmap_done; + goto out_put_metabuf; } vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); @@ -95,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ @@ -103,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } -unmap_done: - erofs_put_metabuf(&buf); - if (err) - goto out_unlock; if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { struct erofs_map_blocks map = { @@ -127,7 +122,7 @@ unmap_done: err = -EFSCORRUPTED; } if (err < 0) - goto out_unlock; + goto out_put_metabuf; } if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && @@ -141,11 +136,14 @@ unmap_done: EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (err < 0) - goto out_unlock; + goto out_put_metabuf; } +done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); +out_put_metabuf: + erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f2a9f2274c3b..528bd44b59e2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -106,6 +106,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); +struct cgroup *cgroup_v1v2_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index 0d200a993489..9fcf09f2ef00 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -196,7 +196,7 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog, return -EINVAL; if (fd) - cgrp = cgroup_get_from_fd(fd); + cgrp = cgroup_v1v2_get_from_fd(fd); else if (id) cgrp = cgroup_get_from_id(id); else /* walk the entire hierarchy by default. */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7f486677ab1f..2319946715e0 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_free_root(root); } +/* + * Returned cgroup is without refcount but it's valid as long as cset pins it. + */ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { @@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, res_cgroup = cset->dfl_cgrp; } else { struct cgrp_cset_link *link; + lockdep_assert_held(&css_set_lock); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; @@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, } } + BUG_ON(!res_cgroup); return res_cgroup; } @@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root) rcu_read_unlock(); - BUG_ON(!res); return res; } +/* + * Look up cgroup associated with current task's cgroup namespace on the default + * hierarchy. + * + * Unlike current_cgns_cgroup_from_root(), this doesn't need locks: + * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu + * pointers. + * - css_set_lock is not needed because we just read cset->dfl_cgrp. + * - As a bonus returned cgrp is pinned with the current because it cannot + * switch cgroup_ns asynchronously. + */ +static struct cgroup *current_cgns_cgroup_dfl(void) +{ + struct css_set *cset; + + cset = current->nsproxy->cgroup_ns->root_cset; + return __cset_cgroup_from_root(cset, &cgrp_dfl_root); +} + /* look up cgroup associated with given css_set on the specified hierarchy */ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { - struct cgroup *res = NULL; - lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); - res = __cset_cgroup_from_root(cset, root); - - BUG_ON(!res); - return res; + return __cset_cgroup_from_root(cset, root); } /* @@ -6191,9 +6209,7 @@ struct cgroup *cgroup_get_from_id(u64 id) if (!cgrp) return ERR_PTR(-ENOENT); - spin_lock_irq(&css_set_lock); - root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); + root_cgrp = current_cgns_cgroup_dfl(); if (!cgroup_is_descendant(cgrp, root_cgrp)) { cgroup_put(cgrp); return ERR_PTR(-ENOENT); @@ -6294,16 +6310,42 @@ void cgroup_fork(struct task_struct *child) INIT_LIST_HEAD(&child->cg_list); } -static struct cgroup *cgroup_get_from_file(struct file *f) +/** + * cgroup_v1v2_get_from_file - get a cgroup pointer from a file pointer + * @f: file corresponding to cgroup_dir + * + * Find the cgroup from a file pointer associated with a cgroup directory. + * Returns a pointer to the cgroup on success. ERR_PTR is returned if the + * cgroup cannot be found. + */ +static struct cgroup *cgroup_v1v2_get_from_file(struct file *f) { struct cgroup_subsys_state *css; - struct cgroup *cgrp; css = css_tryget_online_from_dir(f->f_path.dentry, NULL); if (IS_ERR(css)) return ERR_CAST(css); - cgrp = css->cgroup; + return css->cgroup; +} + +/** + * cgroup_get_from_file - same as cgroup_v1v2_get_from_file, but only supports + * cgroup2. + * @f: file corresponding to cgroup2_dir + */ +static struct cgroup *cgroup_get_from_file(struct file *f) +{ + struct cgroup *cgrp = cgroup_v1v2_get_from_file(f); + + if (IS_ERR(cgrp)) + return ERR_CAST(cgrp); + + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + return ERR_PTR(-EBADF); + } + return cgrp; } @@ -6772,10 +6814,8 @@ struct cgroup *cgroup_get_from_path(const char *path) struct cgroup *cgrp = ERR_PTR(-ENOENT); struct cgroup *root_cgrp; - spin_lock_irq(&css_set_lock); - root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); + root_cgrp = current_cgns_cgroup_dfl(); kn = kernfs_walk_and_get(root_cgrp->kn, path); - spin_unlock_irq(&css_set_lock); if (!kn) goto out; @@ -6800,15 +6840,15 @@ out: EXPORT_SYMBOL_GPL(cgroup_get_from_path); /** - * cgroup_get_from_fd - get a cgroup pointer from a fd - * @fd: fd obtained by open(cgroup2_dir) + * cgroup_v1v2_get_from_fd - get a cgroup pointer from a fd + * @fd: fd obtained by open(cgroup_dir) * * Find the cgroup from a fd which should be obtained * by opening a cgroup directory. Returns a pointer to the * cgroup on success. ERR_PTR is returned if the cgroup * cannot be found. */ -struct cgroup *cgroup_get_from_fd(int fd) +struct cgroup *cgroup_v1v2_get_from_fd(int fd) { struct cgroup *cgrp; struct file *f; @@ -6817,10 +6857,29 @@ struct cgroup *cgroup_get_from_fd(int fd) if (!f) return ERR_PTR(-EBADF); - cgrp = cgroup_get_from_file(f); + cgrp = cgroup_v1v2_get_from_file(f); fput(f); return cgrp; } + +/** + * cgroup_get_from_fd - same as cgroup_v1v2_get_from_fd, but only supports + * cgroup2. + * @fd: fd obtained by open(cgroup2_dir) + */ +struct cgroup *cgroup_get_from_fd(int fd) +{ + struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd); + + if (IS_ERR(cgrp)) + return ERR_CAST(cgrp); + + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + return ERR_PTR(-EBADF); + } + return cgrp; +} EXPORT_SYMBOL_GPL(cgroup_get_from_fd); static u64 power_of_ten(int power) |