diff options
Diffstat (limited to 'fs')
71 files changed, 575 insertions, 453 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index d8b90f95b157..726592868e9c 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -287,7 +287,7 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) if (btrfs_op(bio) == BTRFS_MAP_WRITE) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - if (!(bio->bi_opf & REQ_RAHEAD)) + else if (!(bio->bi_opf & REQ_RAHEAD)) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); if (bio->bi_opf & REQ_PREFLUSH) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5b10401d803b..5fc670c27f86 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -558,14 +558,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group, int index, int max_index, - struct btrfs_key *key) + struct btrfs_key *found_key) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; - int ret = 0; u64 search_offset; u64 search_end = block_group->start + block_group->length; struct btrfs_path *path; + struct btrfs_key search_key; + int ret = 0; ASSERT(index >= 0); ASSERT(index <= max_index); @@ -585,37 +586,24 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ path->reada = READA_FORWARD; search_offset = index * div_u64(block_group->length, max_index); - key->objectid = block_group->start + search_offset; - key->type = BTRFS_EXTENT_ITEM_KEY; - key->offset = 0; + search_key.objectid = block_group->start + search_offset; + search_key.type = BTRFS_EXTENT_ITEM_KEY; + search_key.offset = 0; - while (1) { - ret = btrfs_search_forward(extent_root, key, path, 0); - if (ret != 0) - goto out; + btrfs_for_each_slot(extent_root, &search_key, found_key, path, ret) { /* Success; sampled an extent item in the block group */ - if (key->type == BTRFS_EXTENT_ITEM_KEY && - key->objectid >= block_group->start && - key->objectid + key->offset <= search_end) - goto out; + if (found_key->type == BTRFS_EXTENT_ITEM_KEY && + found_key->objectid >= block_group->start && + found_key->objectid + found_key->offset <= search_end) + break; /* We can't possibly find a valid extent item anymore */ - if (key->objectid >= search_end) { + if (found_key->objectid >= search_end) { ret = 1; break; } - if (key->type < BTRFS_EXTENT_ITEM_KEY) - key->type = BTRFS_EXTENT_ITEM_KEY; - else - key->objectid++; - btrfs_release_path(path); - up_read(&fs_info->commit_root_sem); - mutex_unlock(&caching_ctl->mutex); - cond_resched(); - mutex_lock(&caching_ctl->mutex); - down_read(&fs_info->commit_root_sem); } -out: + lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); btrfs_free_path(path); @@ -659,6 +647,7 @@ out: static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_key key; int i; u64 min_size = block_group->length; @@ -668,6 +657,8 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl if (!btrfs_block_group_should_use_size_class(block_group)) return 0; + lockdep_assert_held(&caching_ctl->mutex); + lockdep_assert_held_read(&fs_info->commit_root_sem); for (i = 0; i < 5; ++i) { ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key); if (ret < 0) @@ -682,7 +673,6 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl block_group->size_class = size_class; spin_unlock(&block_group->lock); } - out: return ret; } @@ -1185,14 +1175,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); - WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, - &block_group->runtime_flags) && - block_group->space_info->active_total_bytes - < block_group->length); } block_group->space_info->total_bytes -= block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); block_group->space_info->bytes_zone_unusable -= @@ -1836,7 +1820,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used %llu%% unusable", - bg->start, div_u64(bg->used * 100, bg->length), + bg->start, + div64_u64(bg->used * 100, bg->length), div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); ret = btrfs_relocate_chunk(fs_info, bg->start); @@ -2493,18 +2478,29 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_block_group_item bgi; struct btrfs_root *root = btrfs_block_group_root(fs_info); struct btrfs_key key; + u64 old_commit_used; + int ret; spin_lock(&block_group->lock); btrfs_set_stack_block_group_used(&bgi, block_group->used); btrfs_set_stack_block_group_chunk_objectid(&bgi, block_group->global_root_id); btrfs_set_stack_block_group_flags(&bgi, block_group->flags); + old_commit_used = block_group->commit_used; + block_group->commit_used = block_group->used; key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; spin_unlock(&block_group->lock); - return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); + ret = btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); + if (ret < 0) { + spin_lock(&block_group->lock); + block_group->commit_used = old_commit_used; + spin_unlock(&block_group->lock); + } + + return ret; } static int insert_dev_extent(struct btrfs_trans_handle *trans, @@ -3474,6 +3470,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&info->delalloc_root_lock); while (total) { + struct btrfs_space_info *space_info; bool reclaim = false; cache = btrfs_lookup_block_group(info, bytenr); @@ -3481,6 +3478,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, ret = -ENOENT; break; } + space_info = cache->space_info; factor = btrfs_bg_type_to_factor(cache->flags); /* @@ -3495,7 +3493,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); - spin_lock(&cache->space_info->lock); + spin_lock(&space_info->lock); spin_lock(&cache->lock); if (btrfs_test_opt(info, SPACE_CACHE) && @@ -3508,24 +3506,24 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, old_val += num_bytes; cache->used = old_val; cache->reserved -= num_bytes; - cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->bytes_used += num_bytes; - cache->space_info->disk_used += num_bytes * factor; + space_info->bytes_reserved -= num_bytes; + space_info->bytes_used += num_bytes; + space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); } else { old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; - btrfs_space_info_update_bytes_pinned(info, - cache->space_info, num_bytes); - cache->space_info->bytes_used -= num_bytes; - cache->space_info->disk_used -= num_bytes * factor; + btrfs_space_info_update_bytes_pinned(info, space_info, + num_bytes); + space_info->bytes_used -= num_bytes; + space_info->disk_used -= num_bytes * factor; reclaim = should_reclaim_block_group(cache, num_bytes); spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0095c6e4c3d1..6b457b010cbc 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1048,7 +1048,7 @@ again: * so there is only one iref. The case that several irefs are * in the same item doesn't exist. */ - btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, path); out: btrfs_release_delayed_iref(node); btrfs_release_path(path); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index be94030e1dfb..138afa955370 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -763,7 +763,13 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, goto next; } + flags = em->flags; clear_bit(EXTENT_FLAG_PINNED, &em->flags); + /* + * In case we split the extent map, we want to preserve the + * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want + * it on the new extent maps. + */ clear_bit(EXTENT_FLAG_LOGGING, &flags); modified = !list_empty(&em->list); @@ -774,7 +780,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, if (em->start >= start && em_end <= end) goto remove_em; - flags = em->flags; gen = em->generation; compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0d250d052487..d84cef89cdff 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); spin_lock(&ctl->tree_lock); + /* Count initial region as zone_unusable until it gets activated. */ if (!used) to_free = size; + else if (initial && + test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) && + (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))) + to_free = 0; else if (initial) to_free = block_group->zone_capacity; else if (offset >= block_group->alloc_offset) @@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, reclaimable_unusable = block_group->zone_unusable - (block_group->length - block_group->zone_capacity); /* All the region is now unusable. Mark it as unused and reclaim */ - if (block_group->zone_unusable == block_group->length) { + if (block_group->zone_unusable == block_group->length && + block_group->alloc_offset) { btrfs_mark_bg_unused(block_group); } else if (bg_reclaim_threshold && reclaimable_unusable >= diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 4c477eae6891..24cd49229408 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -120,11 +120,8 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, - /* - * Indicate metadata over-commit is disabled. This is set when active - * zone tracking is needed. - */ - BTRFS_FS_NO_OVERCOMMIT, + /* This is set when active zone tracking is needed. */ + BTRFS_FS_ACTIVE_ZONE_TRACKING, /* * Indicate if we have some features changed, this is mostly for diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6c18dc9a1831..957e4d76a7b6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5421,8 +5421,13 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry, return -ENOMEM; ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname); - if (ret) + if (ret < 0) goto out; + /* + * fscrypt_setup_filename() should never return a positive value, but + * gcc on sparc/parisc thinks it can, so assert that doesn't happen. + */ + ASSERT(ret == 0); /* This needs to handle no-key deletions later on */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 84626c8ad5bf..a0ef1a1784c7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2859,6 +2859,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, di_args->bytes_used = btrfs_device_get_bytes_used(dev); di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); + memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); if (dev->name) strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path)); else diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 69c09508afb5..3eecce86f63f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, ASSERT(found); spin_lock(&found->lock); found->total_bytes += block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - found->active_total_bytes += block_group->length; found->disk_total += block_group->length * factor; found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; @@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, return avail; } -static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info) -{ - /* - * On regular filesystem, all total_bytes are always writable. On zoned - * filesystem, there may be a limitation imposed by max_active_zones. - * For metadata allocation, we cannot finish an existing active block - * group to avoid a deadlock. Thus, we need to consider only the active - * groups to be writable for metadata space. - */ - if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) - return space_info->total_bytes; - - return space_info->active_total_bytes; -} - int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) @@ -407,13 +389,13 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else avail = calc_available_free_space(fs_info, space_info, flush); - if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) + if (used + bytes < space_info->total_bytes + avail) return 1; return 0; } @@ -449,7 +431,7 @@ again: ticket = list_first_entry(head, struct reserve_ticket, list); /* Check and see if our ticket can be satisfied now. */ - if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || + if ((used + ticket->bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, @@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, { u64 used; u64 avail; - u64 total; u64 to_reclaim = space_info->reclaim_size; lockdep_assert_held(&space_info->lock); @@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, * space. If that's the case add in our overage so we make sure to put * appropriate pressure on the flushing state machine. */ - total = writable_total_bytes(fs_info, space_info); - if (total + avail < used) - to_reclaim += used - (total + avail); + if (space_info->total_bytes + avail < used) + to_reclaim += used - (space_info->total_bytes + avail); return to_reclaim; } @@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, { u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 ordered, delalloc; - u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; u64 used; - thresh = mult_perc(total, 90); + thresh = mult_perc(space_info->total_bytes, 90); lockdep_assert_held(&space_info->lock); @@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, BTRFS_RESERVE_FLUSH_ALL); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_readonly + global_rsv_size; - if (used < total) - thresh += total - used; + if (used < space_info->total_bytes) + thresh += space_info->total_bytes - used; thresh >>= space_info->clamp; used = space_info->bytes_pinned; @@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * can_overcommit() to ensure we can overcommit to continue. */ if (!pending_tickets && - ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || + ((used + orig_bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); @@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, */ if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { used = btrfs_space_info_used(space_info, false); - if (used + orig_bytes <= - writable_total_bytes(fs_info, space_info)) { + if (used + orig_bytes <= space_info->total_bytes) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); ret = 0; diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index fc99ea2b0c34..2033b71b18ce 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -96,8 +96,6 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc/allocations */ u64 bytes_readonly; /* total bytes that are read only */ - /* Total bytes in the space, but only accounts active block groups. */ - u64 active_total_bytes; u64 bytes_zone_unusable; /* total bytes that are unusable until resetting the device zone */ diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 8c5efa5813b3..37fc58a7f27e 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -9,6 +9,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/bug.h> +#include <linux/list.h> #include <crypto/hash.h> #include "messages.h" #include "ctree.h" @@ -778,6 +779,45 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj, return len; } +static ssize_t btrfs_size_classes_show(struct kobject *kobj, + struct kobj_attribute *a, char *buf) +{ + struct btrfs_space_info *sinfo = to_space_info(kobj); + struct btrfs_block_group *bg; + u32 none = 0; + u32 small = 0; + u32 medium = 0; + u32 large = 0; + + for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) { + down_read(&sinfo->groups_sem); + list_for_each_entry(bg, &sinfo->block_groups[i], list) { + if (!btrfs_block_group_should_use_size_class(bg)) + continue; + switch (bg->size_class) { + case BTRFS_BG_SZ_NONE: + none++; + break; + case BTRFS_BG_SZ_SMALL: + small++; + break; + case BTRFS_BG_SZ_MEDIUM: + medium++; + break; + case BTRFS_BG_SZ_LARGE: + large++; + break; + } + } + up_read(&sinfo->groups_sem); + } + return sysfs_emit(buf, "none %u\n" + "small %u\n" + "medium %u\n" + "large %u\n", + none, small, medium, large); +} + #ifdef CONFIG_BTRFS_DEBUG /* * Request chunk allocation with current chunk size. @@ -835,6 +875,7 @@ SPACE_INFO_ATTR(bytes_zone_unusable); SPACE_INFO_ATTR(disk_used); SPACE_INFO_ATTR(disk_total); BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store); +BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show); static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj, struct kobj_attribute *a, @@ -887,6 +928,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(space_info, disk_total), BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold), BTRFS_ATTR_PTR(space_info, chunk_size), + BTRFS_ATTR_PTR(space_info, size_classes), #ifdef CONFIG_BTRFS_DEBUG BTRFS_ATTR_PTR(space_info, force_chunk_alloc), #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7823168c08a6..6d0124b6e79e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6363,7 +6363,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); - ASSERT(!IS_ERR(em)); + if (IS_ERR(em)) + return PTR_ERR(em); map = em->map_lookup; data_stripes = nr_data_stripes(map); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index f95b2c94d619..45d04092f2f8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -524,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); - /* Overcommit does not work well with active zone tacking. */ - set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); + set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); } /* Validate superblock log */ @@ -1581,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) return; WARN_ON(cache->bytes_super != 0); - unusable = (cache->alloc_offset - cache->used) + - (cache->length - cache->zone_capacity); - free = cache->zone_capacity - cache->alloc_offset; + + /* Check for block groups never get activated */ + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) && + cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) && + !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) && + cache->alloc_offset == 0) { + unusable = cache->length; + free = 0; + } else { + unusable = (cache->alloc_offset - cache->used) + + (cache->length - cache->zone_capacity); + free = cache->zone_capacity - cache->alloc_offset; + } /* We only need ->free_space in ALLOC_SEQ block groups */ cache->cached = BTRFS_CACHE_FINISHED; @@ -1902,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); - space_info->active_total_bytes += block_group->length; + WARN_ON(block_group->alloc_offset != 0); + if (block_group->zone_unusable == block_group->length) { + block_group->zone_unusable = block_group->length - block_group->zone_capacity; + space_info->bytes_zone_unusable -= block_group->zone_capacity; + } spin_unlock(&block_group->lock); btrfs_try_granting_tickets(fs_info, space_info); spin_unlock(&space_info->lock); @@ -2086,11 +2099,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) if (!device->bdev) continue; - if (!zinfo->max_active_zones || - atomic_read(&zinfo->active_zones_left)) { + if (!zinfo->max_active_zones) { ret = true; break; } + + switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + ret = (atomic_read(&zinfo->active_zones_left) >= 1); + break; + case BTRFS_BLOCK_GROUP_DUP: + ret = (atomic_read(&zinfo->active_zones_left) >= 2); + break; + } + if (ret) + break; } mutex_unlock(&fs_info->chunk_mutex); @@ -2256,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) u64 avail; spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->alloc_offset == 0 || (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { spin_unlock(&block_group->lock); continue; @@ -2293,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) return 0; - /* No more block groups to activate */ - if (space_info->active_total_bytes == space_info->total_bytes) - return 0; - for (;;) { int ret; bool need_finish = false; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 1911f7016fa1..19a70a69c760 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -420,6 +420,11 @@ skip_rdma: from_kuid(&init_user_ns, ses->linux_uid), from_kuid(&init_user_ns, ses->cred_uid)); + if (ses->dfs_root_ses) { + seq_printf(m, "\n\tDFS root session id: 0x%llx", + ses->dfs_root_ses->Suid); + } + spin_lock(&ses->chan_lock); if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0)) seq_puts(m, "\tPrimary channel: DISCONNECTED "); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 2b1a8d55b4ec..cb40074feb3e 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -179,6 +179,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct path *path) tmp.source = full_path; tmp.leaf_fullpath = NULL; tmp.UNC = tmp.prepath = NULL; + tmp.dfs_root_ses = NULL; rc = smb3_fs_context_dup(ctx, &tmp); if (rc) { diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 013a4bd65280..651759192280 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -61,8 +61,6 @@ struct cifs_sb_info { /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ char *prepath; - /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */ - uuid_t dfs_mount_id; /* * Indicate whether serverino option was turned off later * (cifs_autodisable_serverino) in order to match new mounts. diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a99883f16d94..08a73dcb7786 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1233,6 +1233,7 @@ struct cifs_tcon { /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL struct list_head ulist; /* cache update list */ + struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ }; @@ -1749,9 +1750,8 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct cifs_ses *root_ses; - uuid_t mount_id; char *origin_fullpath, *leaf_fullpath; + struct list_head dfs_ses_list; }; static inline void free_dfs_info_param(struct dfs_info3_param *param) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5233f14f0636..0eceddde7140 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2229,6 +2229,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + ses->dfs_root_ses = ctx->dfs_root_ses; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3407,7 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - uuid_gen(&mnt_ctx.mount_id); + INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); + rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3427,7 +3429,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; - uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); @@ -3439,7 +3440,7 @@ out: return rc; error: - dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); + dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); kfree(mnt_ctx.origin_fullpath); kfree(mnt_ctx.leaf_fullpath); cifs_mount_put_conns(&mnt_ctx); @@ -3637,9 +3638,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); kfree(cifs_sb->prepath); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id); -#endif call_rcu(&cifs_sb->rcu, delayed_free); } diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b64d20374b9c..c8bda52fa096 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -95,25 +95,31 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) ctx->leaf_fullpath = (char *)full_path; rc = cifs_mount_get_session(mnt_ctx); ctx->leaf_fullpath = NULL; - if (!rc) { - struct cifs_ses *ses = mnt_ctx->ses; - mutex_lock(&ses->session_mutex); - ses->dfs_root_ses = mnt_ctx->root_ses; - mutex_unlock(&ses->session_mutex); - } return rc; } -static void set_root_ses(struct cifs_mount_ctx *mnt_ctx) +static int get_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { - if (mnt_ctx->ses) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct dfs_root_ses *root_ses; + struct cifs_ses *ses = mnt_ctx->ses; + + if (ses) { + root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); + if (!root_ses) + return -ENOMEM; + + INIT_LIST_HEAD(&root_ses->list); + spin_lock(&cifs_tcp_ses_lock); - mnt_ctx->ses->ses_count++; + ses->ses_count++; spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); + root_ses->ses = ses; + list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = ses; + return 0; } static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path, @@ -121,7 +127,8 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct dfs_info3_param ref = {}; - int rc; + bool is_refsrv = false; + int rc, rc2; rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref); if (rc) @@ -136,8 +143,7 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co if (rc) goto out; - if (ref.flags & DFSREF_REFERRAL_SERVER) - set_root_ses(mnt_ctx); + is_refsrv = !!(ref.flags & DFSREF_REFERRAL_SERVER); rc = -EREMOTE; if (ref.flags & DFSREF_STORAGE_SERVER) { @@ -146,13 +152,17 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co goto out; /* some servers may not advertise referral capability under ref.flags */ - if (!(ref.flags & DFSREF_REFERRAL_SERVER) && - is_tcon_dfs(mnt_ctx->tcon)) - set_root_ses(mnt_ctx); + is_refsrv |= is_tcon_dfs(mnt_ctx->tcon); rc = cifs_is_path_remote(mnt_ctx); } + if (rc == -EREMOTE && is_refsrv) { + rc2 = get_root_smb_session(mnt_ctx); + if (rc2) + rc = rc2; + } + out: free_dfs_info_param(&ref); return rc; @@ -165,6 +175,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) char *ref_path = NULL, *full_path = NULL; struct dfs_cache_tgt_iterator *tit; struct TCP_Server_Info *server; + struct cifs_tcon *tcon; char *origin_fullpath = NULL; int num_links = 0; int rc; @@ -234,12 +245,22 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!rc) { server = mnt_ctx->server; + tcon = mnt_ctx->tcon; mutex_lock(&server->refpath_lock); - server->origin_fullpath = origin_fullpath; - server->current_fullpath = server->leaf_fullpath; + if (!server->origin_fullpath) { + server->origin_fullpath = origin_fullpath; + server->current_fullpath = server->leaf_fullpath; + origin_fullpath = NULL; + } mutex_unlock(&server->refpath_lock); - origin_fullpath = NULL; + + if (list_empty(&tcon->dfs_ses_list)) { + list_replace_init(&mnt_ctx->dfs_ses_list, + &tcon->dfs_ses_list); + } else { + dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); + } } out: @@ -260,7 +281,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) rc = get_session(mnt_ctx, NULL); if (rc) return rc; - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. @@ -280,7 +301,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) } *isdfs = true; - set_root_ses(mnt_ctx); + rc = get_root_smb_session(mnt_ctx); + if (rc) + return rc; return __dfs_mount_share(mnt_ctx); } diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h index 344bea6d8bab..13f26e01f7b9 100644 --- a/fs/cifs/dfs.h +++ b/fs/cifs/dfs.h @@ -10,6 +10,11 @@ #include "fs_context.h" #include "cifs_unicode.h" +struct dfs_root_ses { + struct list_head list; + struct cifs_ses *ses; +}; + int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -22,9 +27,10 @@ static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path) static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } @@ -43,4 +49,15 @@ static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page) true); } +static inline void dfs_put_root_smb_sessions(struct list_head *head) +{ + struct dfs_root_ses *root, *tmp; + + list_for_each_entry_safe(root, tmp, head, list) { + list_del_init(&root->list); + cifs_put_smb_ses(root->ses); + kfree(root); + } +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index ac86bd0ebd63..1c59811bfa73 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -49,17 +49,6 @@ struct cache_entry { struct cache_dfs_tgt *tgthint; }; -/* List of referral server sessions per dfs mount */ -struct mount_group { - struct list_head list; - uuid_t id; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES]; - int num_sessions; - spinlock_t lock; - struct list_head refresh_list; - struct kref refcount; -}; - static struct kmem_cache *cache_slab __read_mostly; static struct workqueue_struct *dfscache_wq __read_mostly; @@ -76,85 +65,10 @@ static atomic_t cache_count; static struct hlist_head cache_htable[CACHE_HTABLE_SIZE]; static DECLARE_RWSEM(htable_rw_lock); -static LIST_HEAD(mount_group_list); -static DEFINE_MUTEX(mount_group_list_lock); - static void refresh_cache_worker(struct work_struct *work); static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker); -static void __mount_group_release(struct mount_group *mg) -{ - int i; - - for (i = 0; i < mg->num_sessions; i++) - cifs_put_smb_ses(mg->sessions[i]); - kfree(mg); -} - -static void mount_group_release(struct kref *kref) -{ - struct mount_group *mg = container_of(kref, struct mount_group, refcount); - - mutex_lock(&mount_group_list_lock); - list_del(&mg->list); - mutex_unlock(&mount_group_list_lock); - __mount_group_release(mg); -} - -static struct mount_group *find_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - list_for_each_entry(mg, &mount_group_list, list) { - if (uuid_equal(&mg->id, id)) - return mg; - } - return ERR_PTR(-ENOENT); -} - -static struct mount_group *__get_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - mg = find_mount_group_locked(id); - if (!IS_ERR(mg)) - return mg; - - mg = kmalloc(sizeof(*mg), GFP_KERNEL); - if (!mg) - return ERR_PTR(-ENOMEM); - kref_init(&mg->refcount); - uuid_copy(&mg->id, id); - mg->num_sessions = 0; - spin_lock_init(&mg->lock); - list_add(&mg->list, &mount_group_list); - return mg; -} - -static struct mount_group *get_mount_group(const uuid_t *id) -{ - struct mount_group *mg; - - mutex_lock(&mount_group_list_lock); - mg = __get_mount_group_locked(id); - if (!IS_ERR(mg)) - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - return mg; -} - -static void free_mount_group_list(void) -{ - struct mount_group *mg, *tmp_mg; - - list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) { - list_del_init(&mg->list); - __mount_group_release(mg); - } -} - /** * dfs_cache_canonical_path - get a canonical DFS path * @@ -704,7 +618,6 @@ void dfs_cache_destroy(void) { cancel_delayed_work_sync(&refresh_task); unload_nls(cache_cp); - free_mount_group_list(); flush_cache_ents(); kmem_cache_destroy(cache_slab); destroy_workqueue(dfscache_wq); @@ -1111,54 +1024,6 @@ out_unlock: return rc; } -/** - * dfs_cache_add_refsrv_session - add SMB session of referral server - * - * @mount_id: mount group uuid to lookup. - * @ses: reference counted SMB session of referral server. - */ -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses) -{ - struct mount_group *mg; - - if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses)) - return; - - mg = get_mount_group(mount_id); - if (WARN_ON_ONCE(IS_ERR(mg))) - return; - - spin_lock(&mg->lock); - if (mg->num_sessions < ARRAY_SIZE(mg->sessions)) - mg->sessions[mg->num_sessions++] = ses; - spin_unlock(&mg->lock); - kref_put(&mg->refcount, mount_group_release); -} - -/** - * dfs_cache_put_refsrv_sessions - put all referral server sessions - * - * Put all SMB sessions from the given mount group id. - * - * @mount_id: mount group uuid to lookup. - */ -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) -{ - struct mount_group *mg; - - if (!mount_id || uuid_is_null(mount_id)) - return; - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - return; - } - mutex_unlock(&mount_group_list_lock); - kref_put(&mg->refcount, mount_group_release); -} - /* Extract share from DFS target and return a pointer to prefix path or NULL */ static const char *parse_target_share(const char *target, char **share) { @@ -1384,11 +1249,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) cifs_dbg(FYI, "%s: not a dfs mount\n", __func__); return 0; } - - if (uuid_is_null(&cifs_sb->dfs_mount_id)) { - cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); - return -EINVAL; - } /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index be3b5a44cf82..e0d39393035a 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -40,8 +40,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter struct dfs_info3_param *ref); int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, char **prefix); -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 44cb5639ed3b..1b8d4e27f831 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -265,6 +265,7 @@ struct smb3_fs_context { bool rootfs:1; /* if it's a SMB root file system */ bool witness:1; /* use witness protocol */ char *leaf_fullpath; + struct cifs_ses *dfs_root_ses; }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a0d286ee723d..b44fb51968bf 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -22,6 +22,7 @@ #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" #include "dfs_cache.h" +#include "dfs.h" #endif #include "fs_context.h" #include "cached_dir.h" @@ -134,6 +135,9 @@ tconInfoAlloc(void) spin_lock_init(&ret_buf->stat_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_LIST_HEAD(&ret_buf->dfs_ses_list); +#endif return ret_buf; } @@ -149,6 +153,9 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_put_root_smb_sessions(&tcon->dfs_ses_list); +#endif kfree(tcon); } @@ -1255,6 +1262,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, * removing cached DFS targets that the client would eventually * need during failover. */ + ses = CIFS_DFS_ROOT_SES(ses); if (ses->server->ops->get_dfs_refer && !ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs, &num_refs, cifs_sb->local_nls, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 9b956294e864..8dd3791b5c53 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -234,15 +234,32 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, size[0] = 8; /* sizeof __le64 */ data[0] = ptr; - rc = SMB2_set_info_init(tcon, server, - &rqst[num_rqst], COMPOUND_FID, - COMPOUND_FID, current->tgid, - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + if (cfile) { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + } else { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + COMPOUND_FID, + COMPOUND_FID, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + if (!rc) { + smb2_set_next_command(tcon, &rqst[num_rqst]); + smb2_set_related(&rqst[num_rqst]); + } + } if (rc) goto finished; - smb2_set_next_command(tcon, &rqst[num_rqst]); - smb2_set_related(&rqst[num_rqst++]); + num_rqst++; trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path); break; case SMB2_OP_SET_INFO: diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 381babc1212c..d827b7547ffa 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -425,7 +425,7 @@ generate_smb3signingkey(struct cifs_ses *ses, /* safe to access primary channel, since it will never go away */ spin_lock(&ses->chan_lock); - memcpy(ses->chans[0].signkey, ses->smb3signingkey, + memcpy(ses->chans[chan_index].signkey, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b42050c68e6c..24bdd5f4d3bc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -278,7 +278,7 @@ static int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) { - int rc = 0; + int rc; struct kvec *iov; int n_vec; unsigned int send_length = 0; @@ -289,6 +289,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct msghdr smb_msg = {}; __be32 rfc1002_marker; + cifs_in_send_inc(server); if (cifs_rdma_enabled(server)) { /* return -EAGAIN when connecting or reconnecting */ rc = -EAGAIN; @@ -297,14 +298,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, goto smbd_done; } + rc = -EAGAIN; if (ssocket == NULL) - return -EAGAIN; + goto out; + rc = -ERESTARTSYS; if (fatal_signal_pending(current)) { cifs_dbg(FYI, "signal pending before send request\n"); - return -ERESTARTSYS; + goto out; } + rc = 0; /* cork the socket */ tcp_sock_set_cork(ssocket->sk, true); @@ -407,7 +411,8 @@ smbd_done: rc); else if (rc > 0) rc = 0; - +out: + cifs_in_send_dec(server); return rc; } @@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, * I/O response may come back and free the mid entry on another thread. */ cifs_save_when_sent(mid); - cifs_in_send_inc(server); rc = smb_send_rqst(server, 1, rqst, flags); - cifs_in_send_dec(server); if (rc < 0) { revert_current_mid(server, mid->credits); @@ -1144,9 +1147,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, else midQ[i]->callback = cifs_compound_last_callback; } - cifs_in_send_inc(server); rc = smb_send_rqst(server, num_rqst, rqst, flags); - cifs_in_send_dec(server); for (i = 0; i < num_rqst; i++) cifs_save_when_sent(midQ[i]); @@ -1396,9 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) @@ -1539,9 +1538,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, } midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 78086f8dbda5..13d336a6cc5d 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -92,6 +92,8 @@ void fscrypt_put_master_key_activeref(struct super_block *sb, * destroying any subkeys embedded in it. */ + if (WARN_ON(!sb->s_master_keys)) + return; spin_lock(&sb->s_master_keys->lock); hlist_del_rcu(&mk->mk_node); spin_unlock(&sb->s_master_keys->lock); @@ -207,10 +209,11 @@ static int allocate_filesystem_keyring(struct super_block *sb) * Release all encryption keys that have been added to the filesystem, along * with the keyring that contains them. * - * This is called at unmount time. The filesystem's underlying block device(s) - * are still available at this time; this is important because after user file - * accesses have been allowed, this function may need to evict keys from the - * keyslots of an inline crypto engine, which requires the block device(s). + * This is called at unmount time, after all potentially-encrypted inodes have + * been evicted. The filesystem's underlying block device(s) are still + * available at this time; this is important because after user file accesses + * have been allowed, this function may need to evict keys from the keyslots of + * an inline crypto engine, which requires the block device(s). */ void fscrypt_destroy_keyring(struct super_block *sb) { @@ -227,12 +230,12 @@ void fscrypt_destroy_keyring(struct super_block *sb) hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { /* - * Since all inodes were already evicted, every key - * remaining in the keyring should have an empty inode - * list, and should only still be in the keyring due to - * the single active ref associated with ->mk_secret. - * There should be no structural refs beyond the one - * associated with the active ref. + * Since all potentially-encrypted inodes were already + * evicted, every key remaining in the keyring should + * have an empty inode list, and should only still be in + * the keyring due to the single active ref associated + * with ->mk_secret. There should be no structural refs + * beyond the one associated with the active ref. */ WARN_ON(refcount_read(&mk->mk_active_refs) != 1); WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); diff --git a/fs/erofs/data.c b/fs/erofs/data.c index e16545849ea7..c08c0f578bc6 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -376,7 +376,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (bdev) blksize_mask = bdev_logical_block_size(bdev) - 1; else - blksize_mask = (1 << inode->i_blkbits) - 1; + blksize_mask = i_blocksize(inode) - 1; if ((iocb->ki_pos | iov_iter_count(to) | iov_iter_alignment(to)) & blksize_mask) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 091fd5adf818..d38e19c11270 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -47,7 +47,7 @@ void z_erofs_lzma_exit(void) } } -int z_erofs_lzma_init(void) +int __init z_erofs_lzma_init(void) { unsigned int i; @@ -278,7 +278,7 @@ again: } } if (no < nrpages_out && strm->buf.out) - kunmap(rq->in[no]); + kunmap(rq->out[no]); if (ni < nrpages_in) kunmap(rq->in[ni]); /* 4. push back LZMA stream context to the global list */ diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 3f3561d37d1b..1db018f8c2e8 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -486,7 +486,7 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) void *erofs_get_pcpubuf(unsigned int requiredpages); void erofs_put_pcpubuf(void *ptr); int erofs_pcpubuf_growsize(unsigned int nrpages); -void erofs_pcpubuf_init(void); +void __init erofs_pcpubuf_init(void); void erofs_pcpubuf_exit(void); int erofs_register_sysfs(struct super_block *sb); @@ -545,7 +545,7 @@ static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA -int z_erofs_lzma_init(void); +int __init z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); int z_erofs_load_lzma_config(struct super_block *sb, struct erofs_super_block *dsb, diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c index a2efd833d1b6..c7a4b1d77069 100644 --- a/fs/erofs/pcpubuf.c +++ b/fs/erofs/pcpubuf.c @@ -114,7 +114,7 @@ out: return ret; } -void erofs_pcpubuf_init(void) +void __init erofs_pcpubuf_init(void) { int cpu; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3247d2422bea..f1708c77a991 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1312,12 +1312,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (!be->decompressed_pages) be->decompressed_pages = - kcalloc(be->nr_pages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kvcalloc(be->nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); if (!be->compressed_pages) be->compressed_pages = - kcalloc(pclusterpages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kvcalloc(pclusterpages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); z_erofs_parse_out_bvecs(be); err2 = z_erofs_parse_in_bvecs(be, &overlapped); @@ -1365,7 +1365,7 @@ out: } if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) - kfree(be->compressed_pages); + kvfree(be->compressed_pages); z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { @@ -1384,7 +1384,7 @@ out: } if (be->decompressed_pages != be->onstack_pages) - kfree(be->decompressed_pages); + kvfree(be->decompressed_pages); pcl->length = 0; pcl->partial = true; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8bf6d30518b6..655da4d739cb 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -757,9 +757,6 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, err = z_erofs_do_map_blocks(inode, map, flags); out: trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); - - /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ - DBG_BUGON(err < 0 && err != -ENOMEM); return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4eeb02d456a9..08b29c289da4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1387,7 +1387,7 @@ struct ext4_super_block { __le32 s_first_meta_bg; /* First metablock block group */ __le32 s_mkfs_time; /* When the filesystem was created */ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ - /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ + /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ __le32 s_r_blocks_count_hi; /* Reserved blocks count */ __le32 s_free_blocks_count_hi; /* Free blocks count */ diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 4493ef0c715e..cdf9bfe10137 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb, keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; + if (keys[1].fmr_physical < keys[0].fmr_physical) + return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 2b42ececa46d..1602d74b5eeb 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode) (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d251d705c276..bf0b7dea4900 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4797,8 +4797,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode, if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); - return ext4_find_inline_data_nolock(inode); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return err; } else EXT4_I(inode)->i_inline_off = 0; return 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 12435d61f09e..f9a430152063 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -431,6 +431,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); + EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 94608b7df7e8..a5010b5b8a8c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1595,11 +1595,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, int has_inline_data = 1; ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); - if (has_inline_data) { - if (inlined) - *inlined = 1; + if (inlined) + *inlined = has_inline_data; + if (has_inline_data) goto cleanup_and_exit; - } } if ((namelen <= 2) && (name[0] == '.') && @@ -3646,7 +3645,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, * so the old->de may no longer valid and need to find it again * before reset old inode info. */ - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) retval = PTR_ERR(old.bh); if (!old.bh) @@ -3813,9 +3813,20 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, return retval; } - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); - if (IS_ERR(old.bh)) - return PTR_ERR(old.bh); + /* + * We need to protect against old.inode directory getting converted + * from inline directory format into a normal one. + */ + if (S_ISDIR(old.inode->i_mode)) + inode_lock_nested(old.inode, I_MUTEX_NONDIR2); + + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); + if (IS_ERR(old.bh)) { + retval = PTR_ERR(old.bh); + goto unlock_moved_dir; + } + /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3872,16 +3883,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - /* - * We need to protect against old.inode directory getting - * converted from inline directory format into a normal one. - */ - inode_lock_nested(old.inode, I_MUTEX_NONDIR2); retval = ext4_rename_dir_prepare(handle, &old); - if (retval) { - inode_unlock(old.inode); + if (retval) goto end_rename; - } } /* * If we're renaming a file within an inline_data dir and adding or @@ -4013,12 +4017,15 @@ end_rename: } else { ext4_journal_stop(handle); } - if (old.dir_bh) - inode_unlock(old.inode); release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); + +unlock_moved_dir: + if (S_ISDIR(old.inode->i_mode)) + inode_unlock(old.inode); + return retval; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index beaec6d81074..1e4db96a04e6 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io, struct inode *inode, - struct page *page, + struct page *pagecache_page, + struct page *bounce_page, struct buffer_head *bh) { int ret; @@ -421,10 +422,11 @@ submit_and_retry: } if (io->io_bio == NULL) io_submit_init_bio(io, bh); - ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); + ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, + bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; - wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); + wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size); io->io_next_block++; } @@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - io_submit_add_bh(io, inode, - bounce_page ? bounce_page : page, bh); + io_submit_add_bh(io, inode, page, bounce_page, bh); } while ((bh = bh->b_this_page) != head); unlock: unlock_page(page); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88f7b8a88c76..f43e526112ae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5726,6 +5726,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, return journal_inode; } +static int ext4_journal_bmap(journal_t *journal, sector_t *block) +{ + struct ext4_map_blocks map; + int ret; + + if (journal->j_inode == NULL) + return 0; + + map.m_lblk = *block; + map.m_len = 1; + ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0); + if (ret <= 0) { + ext4_msg(journal->j_inode->i_sb, KERN_CRIT, + "journal bmap failed: block %llu ret %d\n", + *block, ret); + jbd2_journal_abort(journal, ret ? ret : -EIO); + return ret; + } + *block = map.m_pblk; + return 0; +} + static journal_t *ext4_get_journal(struct super_block *sb, unsigned int journal_inum) { @@ -5746,6 +5768,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, return NULL; } journal->j_private = sb; + journal->j_bmap = ext4_journal_bmap; ext4_init_journal_params(sb, journal); return journal; } @@ -5920,6 +5943,7 @@ static int ext4_load_journal(struct super_block *sb, err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); + if (save) memcpy(save, ((char *) es) + EXT4_S_ERR_START, EXT4_S_ERR_LEN); @@ -5928,6 +5952,14 @@ static int ext4_load_journal(struct super_block *sb, memcpy(((char *) es) + EXT4_S_ERR_START, save, EXT4_S_ERR_LEN); kfree(save); + es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state & + EXT4_ERROR_FS); + /* Write out restored error information to the superblock */ + if (!bdev_read_only(sb->s_bdev)) { + int err2; + err2 = ext4_commit_super(sb); + err = err ? : err2; + } } if (err) { @@ -6157,11 +6189,13 @@ static int ext4_clear_journal_err(struct super_block *sb, errstr = ext4_decode_error(sb, j_errno, nbuf); ext4_warning(sb, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, "Marking fs in need of filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb); + j_errno = ext4_commit_super(sb); + if (j_errno) + return j_errno; + ext4_warning(sb, "Marked fs in need of filesystem check."); jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index e2b8b3437c58..12d6252e3e22 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -501,13 +501,13 @@ static const struct sysfs_ops ext4_attr_ops = { .store = ext4_attr_store, }; -static struct kobj_type ext4_sb_ktype = { +static const struct kobj_type ext4_sb_ktype = { .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; -static struct kobj_type ext4_feat_ktype = { +static const struct kobj_type ext4_feat_ktype = { .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_feat_release, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62f2ec599218..767454d74cd6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2852,6 +2852,9 @@ shift: (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; + if (ext4_has_inline_data(inode)) + error = ext4_find_inline_data_nolock(inode); + cleanup: if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", diff --git a/fs/file.c b/fs/file.c index c942c89ca4cd..7893ea161d77 100644 --- a/fs/file.c +++ b/fs/file.c @@ -642,6 +642,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd) if (fd >= fdt->max_fds) return NULL; + fd = array_index_nospec(fd, fdt->max_fds); file = fdt->fd[fd]; if (file) { rcu_assign_pointer(fdt->fd[fd], NULL); diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 6fe9ca253b70..2e215e8c3c88 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -83,8 +83,26 @@ static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) return 0; } +static int gfs2_dentry_delete(const struct dentry *dentry) +{ + struct gfs2_inode *ginode; + + if (d_really_is_negative(dentry)) + return 0; + + ginode = GFS2_I(d_inode(dentry)); + if (!gfs2_holder_initialized(&ginode->i_iopen_gh)) + return 0; + + if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) + return 1; + + return 0; +} + const struct dentry_operations gfs2_dops = { .d_revalidate = gfs2_drevalidate, .d_hash = gfs2_dhash, + .d_delete = gfs2_dentry_delete, }; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e80c781731f8..8ae419152ff6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -969,10 +969,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, { int err = 0; unsigned long long ret; - sector_t block = 0; + sector_t block = blocknr; - if (journal->j_inode) { - block = blocknr; + if (journal->j_bmap) { + err = journal->j_bmap(journal, &block); + if (err == 0) + *retp = block; + } else if (journal->j_inode) { ret = bmap(journal->j_inode, &block); if (ret || !block) { diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 6e61b5bc7d86..cead696b656a 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -727,8 +727,9 @@ static int generate_key(struct ksmbd_conn *conn, struct ksmbd_session *sess, goto smb3signkey_ret; } - if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || - conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) + if (key_size == SMB3_ENC_DEC_KEY_SIZE && + (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || + conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4); else rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4); diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 5b10b03800c1..115a67d2cf78 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -298,7 +298,7 @@ int ksmbd_conn_handler_loop(void *p) kvfree(conn->request_buf); conn->request_buf = NULL; - size = t->ops->read(t, hdr_buf, sizeof(hdr_buf)); + size = t->ops->read(t, hdr_buf, sizeof(hdr_buf), -1); if (size != sizeof(hdr_buf)) break; @@ -319,13 +319,10 @@ int ksmbd_conn_handler_loop(void *p) } /* - * Check if pdu size is valid (min : smb header size, - * max : 0x00FFFFFF). + * Check maximum pdu size(0x00FFFFFF). */ - if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || - pdu_size > MAX_STREAM_PROT_LEN) { + if (pdu_size > MAX_STREAM_PROT_LEN) break; - } /* 4 for rfc1002 length field */ size = pdu_size + 4; @@ -344,7 +341,7 @@ int ksmbd_conn_handler_loop(void *p) * We already read 4 bytes to find out PDU size, now * read in PDU */ - size = t->ops->read(t, conn->request_buf + 4, pdu_size); + size = t->ops->read(t, conn->request_buf + 4, pdu_size, 2); if (size < 0) { pr_err("sock_read failed: %d\n", size); break; diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index 3643354a3fa7..0e3a848defaf 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -114,7 +114,8 @@ struct ksmbd_transport_ops { int (*prepare)(struct ksmbd_transport *t); void (*disconnect)(struct ksmbd_transport *t); void (*shutdown)(struct ksmbd_transport *t); - int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size); + int (*read)(struct ksmbd_transport *t, char *buf, + unsigned int size, int max_retries); int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int size, bool need_invalidate_rkey, unsigned int remote_key); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 0685c1c77b9f..97c9d1b5bcc0 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2977,8 +2977,11 @@ int smb2_open(struct ksmbd_work *work) sizeof(struct smb_acl) + sizeof(struct smb_ace) * ace_num * 2, GFP_KERNEL); - if (!pntsd) + if (!pntsd) { + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); goto err_out; + } rc = build_sec_desc(idmap, pntsd, NULL, 0, @@ -4934,6 +4937,10 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps); + if (test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_STREAMS)) + info->Attributes |= cpu_to_le32(FILE_NAMED_STREAMS); + info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen); len = smbConvertToUTF16((__le16 *)info->FileSystemName, "NTFS", PATH_MAX, conn->local_nls, 0); @@ -7444,13 +7451,16 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, if (in_count == 0) return -EINVAL; + start = le64_to_cpu(qar_req->file_offset); + length = le64_to_cpu(qar_req->length); + + if (start < 0 || length < 0) + return -EINVAL; + fp = ksmbd_lookup_fd_fast(work, id); if (!fp) return -ENOENT; - start = le64_to_cpu(qar_req->file_offset); - length = le64_to_cpu(qar_req->length); - ret = ksmbd_vfs_fqar_lseek(fp, start, length, qar_rsp, in_count, out_count); if (ret && ret != -E2BIG) @@ -7751,7 +7761,7 @@ int smb2_ioctl(struct ksmbd_work *work) off = le64_to_cpu(zero_data->FileOffset); bfz = le64_to_cpu(zero_data->BeyondFinalZero); - if (off > bfz) { + if (off < 0 || bfz < 0 || off > bfz) { ret = -EINVAL; goto out; } diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index fa2b54df6ee6..9c1ce6d199ce 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -434,7 +434,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, static int __smb2_negotiate(struct ksmbd_conn *conn) { - return (conn->dialect >= SMB21_PROT_ID && + return (conn->dialect >= SMB20_PROT_ID && conn->dialect <= SMB311_PROT_ID); } @@ -442,9 +442,26 @@ static int smb_handle_negotiate(struct ksmbd_work *work) { struct smb_negotiate_rsp *neg_rsp = work->response_buf; - ksmbd_debug(SMB, "Unsupported SMB protocol\n"); - neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE; - return -EINVAL; + ksmbd_debug(SMB, "Unsupported SMB1 protocol\n"); + + /* + * Remove 4 byte direct TCP header, add 2 byte bcc and + * 2 byte DialectIndex. + */ + *(__be32 *)work->response_buf = + cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2); + neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS; + + neg_rsp->hdr.Command = SMB_COM_NEGOTIATE; + *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER; + neg_rsp->hdr.Flags = SMBFLG_RESPONSE; + neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS | + SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME; + + neg_rsp->hdr.WordCount = 1; + neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect); + neg_rsp->ByteCount = 0; + return 0; } int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) @@ -465,7 +482,7 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) } } - if (command == SMB2_NEGOTIATE_HE && __smb2_negotiate(conn)) { + if (command == SMB2_NEGOTIATE_HE) { ret = smb2_handle_negotiate(work); init_smb2_neg_rsp(work); return ret; diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index e663ab9ea759..d30ce4c1a151 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -158,8 +158,15 @@ #define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) #define SMB_COM_NEGOTIATE 0x72 - #define SMB1_CLIENT_GUID_SIZE (16) + +#define SMBFLG_RESPONSE 0x80 /* this PDU is a response from server */ + +#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40) +#define SMBFLG2_EXT_SEC cpu_to_le16(0x800) +#define SMBFLG2_ERR_STATUS cpu_to_le16(0x4000) +#define SMBFLG2_UNICODE cpu_to_le16(0x8000) + struct smb_hdr { __be32 smb_buf_length; __u8 Protocol[4]; @@ -199,28 +206,7 @@ struct smb_negotiate_req { struct smb_negotiate_rsp { struct smb_hdr hdr; /* wct = 17 */ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ - __u8 SecurityMode; - __le16 MaxMpxCount; - __le16 MaxNumberVcs; - __le32 MaxBufferSize; - __le32 MaxRawSize; - __le32 SessionKey; - __le32 Capabilities; /* see below */ - __le32 SystemTimeLow; - __le32 SystemTimeHigh; - __le16 ServerTimeZone; - __u8 EncryptionKeyLength; __le16 ByteCount; - union { - unsigned char EncryptionKey[8]; /* cap extended security off */ - /* followed by Domain name - if extended security is off */ - /* followed by 16 bytes of server GUID */ - /* then security blob if cap_extended_security negotiated */ - struct { - unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; - unsigned char SecurityBlob[1]; - } __packed extended_response; - } __packed u; } __packed; struct filesystem_attribute_info { diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 096eda9ef873..c06efc020bd9 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -670,7 +670,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, } static int smb_direct_read(struct ksmbd_transport *t, char *buf, - unsigned int size) + unsigned int size, int unused) { struct smb_direct_recvmsg *recvmsg; struct smb_direct_data_transfer *data_transfer; diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 603893fd87f5..20e85e2701f2 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -291,16 +291,18 @@ static int ksmbd_tcp_run_kthread(struct interface *iface) /** * ksmbd_tcp_readv() - read data from socket in given iovec - * @t: TCP transport instance - * @iov_orig: base IO vector - * @nr_segs: number of segments in base iov - * @to_read: number of bytes to read from socket + * @t: TCP transport instance + * @iov_orig: base IO vector + * @nr_segs: number of segments in base iov + * @to_read: number of bytes to read from socket + * @max_retries: maximum retry count * * Return: on success return number of bytes read from socket, * otherwise return error number */ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, - unsigned int nr_segs, unsigned int to_read) + unsigned int nr_segs, unsigned int to_read, + int max_retries) { int length = 0; int total_read; @@ -308,7 +310,6 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; - int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -335,14 +336,23 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if ((length == -ERESTARTSYS || length == -EAGAIN) && - max_retry) { + } else if (length == -ERESTARTSYS || length == -EAGAIN) { + /* + * If max_retries is negative, Allow unlimited + * retries to keep connection with inactive sessions. + */ + if (max_retries == 0) { + total_read = length; + break; + } else if (max_retries > 0) { + max_retries--; + } + usleep_range(1000, 2000); length = 0; - max_retry--; continue; } else if (length <= 0) { - total_read = -EAGAIN; + total_read = length; break; } } @@ -358,14 +368,15 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, * Return: on success return number of bytes read from socket, * otherwise return error number */ -static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read) +static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, + unsigned int to_read, int max_retries) { struct kvec iov; iov.iov_base = buf; iov.iov_len = to_read; - return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read); + return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read, max_retries); } static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 7df6324ccb8a..8161667c976f 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) u32 exclusive; int error; __be32 *p; - s32 end; memset(lock, 0, sizeof(*lock)); locks_init_lock(fl); @@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK; p = xdr_decode_hyper(p, &l_offset); xdr_decode_hyper(p, &l_len); - end = l_offset + l_len - 1; - - fl->fl_start = (loff_t)l_offset; - if (l_len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = (loff_t)end; + nlm4svc_set_file_lock_range(fl, l_offset, l_len); error = 0; out: return error; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 712fdfeb8ef0..5fcbf30cd275 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -33,6 +33,17 @@ loff_t_to_s64(loff_t offset) return res; } +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len) +{ + s64 end = off + len - 1; + + fl->fl_start = off; + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = end; +} + /* * NLM file handles are defined by specification to be a variable-length * XDR opaque no longer than 1024 bytes. However, this implementation @@ -80,7 +91,7 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - + nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); return true; } diff --git a/fs/locks.c b/fs/locks.c index 66b4eef09db5..df8b26a42524 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1863,9 +1863,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct inode *inode = file_inode(filp); + vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode); int error; - if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) + if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -2425,7 +2426,6 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock) if (flock->l_pid != 0) goto out; - cmd = F_GETLK64; fl->fl_flags |= FL_OFDLCK; fl->fl_owner = filp; } diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 14a72224b657..450d6c3bc05e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -75,7 +75,7 @@ config NFS_V3_ACL config NFS_V4 tristate "NFS client support for NFS version 4" depends on NFS_FS - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select KEYS help This option enables support for version 4 of the NFS protocol diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a41c3ee4549c..6fbcbb8d6587 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -3089,7 +3089,6 @@ static void nfs_access_add_rbtree(struct inode *inode, else goto found; } - set->timestamp = ktime_get_ns(); rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); @@ -3114,6 +3113,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, cache->fsgid = cred->fsgid; cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; + cache->timestamp = ktime_get_ns(); /* The above field assignments must be visible * before this item appears on the lru. We cannot easily diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c380cff4108e..e90988591df4 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,6 +15,7 @@ #include <linux/stat.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/task_io_accounting_ops.h> #include <linux/pagemap.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> @@ -337,6 +338,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) trace_nfs_aop_readpage(inode, folio); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + task_io_account_read(folio_size(folio)); /* * Try to flush any pending writes to the file.. @@ -393,6 +395,7 @@ void nfs_readahead(struct readahead_control *ractl) trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + task_io_account_read(readahead_length(ractl)); ret = -ESTALE; if (NFS_STALE(inode)) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 7c441f2bd444..43b88eaf0673 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,7 +73,7 @@ config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS select FS_POSIX_ACL - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e7462b5e5f1e..5783209f17fc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -941,8 +941,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct page *last_page; last_page = page + (offset + sd->len - 1) / PAGE_SIZE; - for (page += offset / PAGE_SIZE; page <= last_page; page++) + for (page += offset / PAGE_SIZE; page <= last_page; page++) { + /* + * Skip page replacement when extending the contents + * of the current page. + */ + if (page == *(rqstp->rq_next_page - 1)) + continue; svc_rqst_replace_page(rqstp, page); + } if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; @@ -1104,7 +1111,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); + file_start_write(file); host_err = vfs_iter_write(file, &iter, &pos, flags); + file_end_write(file); if (host_err < 0) { nfsd_reset_write_verifier(nn); trace_nfsd_writeverf_reset(nn, rqstp, host_err); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 5ccc638ae92f..1dfbc0c34513 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_index > ~(__u64)0 - argv->v_nmembs) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1d65f6ef00ca..0394505fdce3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (unlikely(copied < len) && wc->w_target_page) { + loff_t new_isize; + if (!PageUptodate(wc->w_target_page)) copied = 0; - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, - start+len); + new_isize = max_t(loff_t, i_size_read(inode), pos + copied); + if (new_isize > page_offset(wc->w_target_page)) + ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + start+len); + else { + /* + * When page is fully beyond new isize (data copy + * failed), do not bother zeroing the page. Invalidate + * it instead so that writeback does not get confused + * put page & buffer dirty bits into inconsistent + * state. + */ + block_invalidate_folio(page_folio(wc->w_target_page), + 0, PAGE_SIZE); + } } if (wc->w_target_page) flush_dcache_page(wc->w_target_page); diff --git a/fs/splice.c b/fs/splice.c index 2e76dbb81a8f..2c3dec2b6dfa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -937,7 +937,6 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, /* * Do the splice. */ - ret = 0; bytes = 0; len = sd->total_len; flags = sd->flags; diff --git a/fs/super.c b/fs/super.c index 84332d5cb817..04bc62ab7dfe 100644 --- a/fs/super.c +++ b/fs/super.c @@ -475,13 +475,22 @@ void generic_shutdown_super(struct super_block *sb) cgroup_writeback_umount(); - /* evict all inodes with zero refcount */ + /* Evict all inodes with zero refcount. */ evict_inodes(sb); - /* only nonzero refcount inodes can have marks */ + + /* + * Clean up and evict any inodes that still have references due + * to fsnotify or the security policy. + */ fsnotify_sb_delete(sb); - fscrypt_destroy_keyring(sb); security_sb_delete(sb); + /* + * Now that all potentially-encrypted inodes have been evicted, + * the fscrypt keyring can be destroyed. + */ + fscrypt_destroy_keyring(sb); + if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); sb->s_dio_done_wq = NULL; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index f7a9607c2b95..2210e5eb1ea0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -193,7 +193,7 @@ static int udf_adinicb_writepage(struct folio *folio, struct udf_inode_info *iinfo = UDF_I(inode); BUG_ON(!PageLocked(page)); - memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr, + memcpy_from_page(iinfo->i_data + iinfo->i_lenEAttr, page, 0, i_size_read(inode)); unlock_page(page); mark_inode_dirty(inode); @@ -241,6 +241,15 @@ static int udf_read_folio(struct file *file, struct folio *folio) static void udf_readahead(struct readahead_control *rac) { + struct udf_inode_info *iinfo = UDF_I(rac->mapping->host); + + /* + * No readahead needed for in-ICB files and udf_get_block() would get + * confused for such file anyway. + */ + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + return; + mpage_readahead(rac, udf_get_block); } @@ -407,6 +416,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map) int err; struct udf_inode_info *iinfo = UDF_I(inode); + if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)) + return -EFSCORRUPTED; + map->oflags = 0; if (!(map->iflags & UDF_MAP_CREATE)) { struct kernel_lb_addr eloc; diff --git a/fs/verity/enable.c b/fs/verity/enable.c index e13db6507b38..7a0e3a84d370 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,7 +8,6 @@ #include "fsverity_private.h" #include <linux/mount.h> -#include <linux/pagemap.h> #include <linux/sched/signal.h> #include <linux/uaccess.h> @@ -367,25 +366,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) goto out_drop_write; err = enable_verity(filp, &arg); - if (err) - goto out_allow_write_access; /* - * Some pages of the file may have been evicted from pagecache after - * being used in the Merkle tree construction, then read into pagecache - * again by another process reading from the file concurrently. Since - * these pages didn't undergo verification against the file digest which - * fs-verity now claims to be enforcing, we have to wipe the pagecache - * to ensure that all future reads are verified. + * We no longer drop the inode's pagecache after enabling verity. This + * used to be done to try to avoid a race condition where pages could be + * evicted after being used in the Merkle tree construction, then + * re-instantiated by a concurrent read. Such pages are unverified, and + * the backing storage could have filled them with different content, so + * they shouldn't be used to fulfill reads once verity is enabled. + * + * But, dropping the pagecache has a big performance impact, and it + * doesn't fully solve the race condition anyway. So for those reasons, + * and also because this race condition isn't very important relatively + * speaking (especially for small-ish files, where the chance of a page + * being used, evicted, *and* re-instantiated all while enabling verity + * is quite small), we no longer drop the inode's pagecache. */ - filemap_write_and_wait(inode->i_mapping); - invalidate_inode_pages2(inode->i_mapping); /* * allow_write_access() is needed to pair with deny_write_access(). * Regardless, the filesystem won't allow writing to verity files. */ -out_allow_write_access: allow_write_access(filp); out_drop_write: mnt_drop_write_file(filp); diff --git a/fs/verity/verify.c b/fs/verity/verify.c index f50e3b5b52c9..e2508222750b 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -387,15 +387,15 @@ EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); int __init fsverity_init_workqueue(void) { /* - * Use an unbound workqueue to allow bios to be verified in parallel - * even when they happen to complete on the same CPU. This sacrifices - * locality, but it's worthwhile since hashing is CPU-intensive. + * Use a high-priority workqueue to prioritize verification work, which + * blocks reads from completing, over regular application tasks. * - * Also use a high-priority workqueue to prioritize verification work, - * which blocks reads from completing, over regular application tasks. + * For performance reasons, don't use an unbound workqueue. Using an + * unbound workqueue for crypto operations causes excessive scheduler + * latency on ARM64. */ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", - WQ_UNBOUND | WQ_HIGHPRI, + WQ_HIGHPRI, num_online_cpus()); if (!fsverity_read_workqueue) return -ENOMEM; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 738b0e28d74b..617e4f9db42e 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -383,7 +383,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) struct block_device *bdev = inode->i_sb->s_bdev; unsigned int max = bdev_max_zone_append_sectors(bdev); struct bio *bio; - ssize_t size; + ssize_t size = 0; int nr_pages; ssize_t ret; @@ -426,7 +426,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) if (bio->bi_iter.bi_sector != wpsector) { zonefs_warn(inode->i_sb, "Corrupted write pointer %llu for zone at %llu\n", - wpsector, z->z_sector); + bio->bi_iter.bi_sector, z->z_sector); ret = -EIO; } } |