diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/acl.c | 10 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 1 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 26 | ||||
-rw-r--r-- | fs/ext4/dir.c | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 61 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 2 | ||||
-rw-r--r-- | fs/ext4/extents.c | 37 | ||||
-rw-r--r-- | fs/ext4/fast_commit.c | 473 | ||||
-rw-r--r-- | fs/ext4/fast_commit.h | 33 | ||||
-rw-r--r-- | fs/ext4/file.c | 14 | ||||
-rw-r--r-- | fs/ext4/hash.c | 2 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 2 | ||||
-rw-r--r-- | fs/ext4/inline.c | 37 | ||||
-rw-r--r-- | fs/ext4/inode.c | 258 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 332 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 437 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 25 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 41 | ||||
-rw-r--r-- | fs/ext4/orphan.c | 4 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 27 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 14 | ||||
-rw-r--r-- | fs/ext4/resize.c | 26 | ||||
-rw-r--r-- | fs/ext4/super.c | 2047 | ||||
-rw-r--r-- | fs/ext4/sysfs.c | 44 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 6 |
27 files changed, 2471 insertions, 1492 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 0613dfcbfd4a..57e82e25f8e2 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -139,7 +139,7 @@ fail: /* * Inode operation get_posix_acl(). * - * inode->i_mutex: don't care + * inode->i_rwsem: don't care */ struct posix_acl * ext4_get_acl(struct inode *inode, int type, bool rcu) @@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu) /* * Set the access or default ACL of an inode. * - * inode->i_mutex: down unless called from ext4_new_inode + * inode->i_rwsem: down unless called from ext4_new_inode */ static int __ext4_set_acl(handle_t *handle, struct inode *inode, int type, @@ -246,7 +246,6 @@ retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) return PTR_ERR(handle); - ext4_fc_start_update(inode); if ((type == ACL_TYPE_ACCESS) && acl) { error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); @@ -264,7 +263,6 @@ retry: } out_stop: ext4_journal_stop(handle); - ext4_fc_stop_update(inode); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; return error; @@ -273,8 +271,8 @@ out_stop: /* * Initialize the ACLs of a new inode. Called from ext4_new_inode. * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) + * dir->i_rwsem: down + * inode->i_rwsem: up (access to inode is still exclusive) */ int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a0fb0c4bdc7c..78ee3ef795ae 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -411,6 +411,7 @@ verified: * ext4_read_block_bitmap_nowait() * @sb: super block * @block_group: given block group + * @ignore_locked: ignore locked buffers * * Read the bitmap for a given block_group,and validate the * bits for block/inode/inode tables are set in the bitmaps diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4666b55b736e..5504f72bbbbe 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with some other filesystem metadata blocks. - */ -int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, - unsigned int count) +int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_system_blocks *system_blks; struct ext4_system_zone *entry; struct rb_node *n; @@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; else { - ret = (entry->ino == inode->i_ino); + ret = 0; + if (inode) + ret = (entry->ino == inode->i_ino); break; } } @@ -338,6 +335,17 @@ out_rcu: return ret; } +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, + unsigned int count) +{ + return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count); +} + int ext4_check_blockref(const char *function, unsigned int line, struct inode *inode, __le32 *p, unsigned int max) { diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 74b172a4adda..a6bb86f52b9a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -303,7 +303,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto done; brelse(bh); bh = NULL; - offset = 0; } done: err = 0; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 404dd50856e5..3f87cca49f0c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1028,7 +1028,7 @@ struct ext4_inode_info { /* * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention + * data. Taking i_rwsem even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. @@ -1046,6 +1046,8 @@ struct ext4_inode_info { /* Fast commit related info */ + /* For tracking dentry create updates */ + struct list_head i_fc_dilist; struct list_head i_fc_list; /* * inodes that need fast commit * protected by sbi->s_fc_lock. @@ -1279,7 +1281,7 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le -extern void ext4_set_bits(void *bm, int cur, int len); +extern void mb_set_bits(void *bm, int cur, int len); /* * Maximal mount counts between two filesystem checks @@ -1298,6 +1300,8 @@ extern void ext4_set_bits(void *bm, int cur, int len); /* Metadata checksum algorithm codes */ #define EXT4_CRC32C_CHKSUM 1 +#define EXT4_LABEL_MAX 16 + /* * Structure of the super block */ @@ -1347,7 +1351,7 @@ struct ext4_super_block { /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ __le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*78*/ char s_volume_name[16]; /* volume name */ +/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */ /*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* @@ -1661,7 +1665,7 @@ struct ext4_sb_info { struct task_struct *s_mmp_tsk; /* record the last minlen when FITRIM is called. */ - atomic_t s_last_trim_minblks; + unsigned long s_last_trim_minblks; /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; @@ -1697,6 +1701,7 @@ struct ext4_sb_info { */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; + u64 s_dax_part_off; #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; #endif @@ -1725,9 +1730,9 @@ struct ext4_sb_info { */ struct work_struct s_error_work; - /* Ext4 fast commit stuff */ + /* Ext4 fast commit sub transaction ID */ atomic_t s_fc_subtid; - atomic_t s_fc_ineligible_updates; + /* * After commit starts, the main queue gets locked, and the further * updates get added in the staging queue. @@ -1747,7 +1752,7 @@ struct ext4_sb_info { spinlock_t s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats; - u64 s_fc_avg_commit_time; + tid_t s_fc_ineligible_tid; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -1793,10 +1798,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) enum { EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_FS_ABORTED, /* Fatal error detected */ - EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ - EXT4_MF_FC_COMMITTING /* File system underoing a fast - * commit. - */ + EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) @@ -2399,8 +2401,7 @@ ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) { - if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) - BUG(); + BUG_ON((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)); #if (PAGE_SIZE >= 65536) if (len < 65536) return cpu_to_le16(len); @@ -2484,7 +2485,7 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct fscrypt_str cf_name; #endif }; @@ -2720,7 +2721,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *fname); @@ -2753,7 +2754,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif return err; @@ -2772,7 +2773,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); #endif return err; @@ -2789,7 +2790,7 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2805,7 +2806,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif @@ -2821,7 +2822,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline void ext4_fname_free_filename(struct ext4_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2925,9 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); -void ext4_fc_mark_ineligible(struct super_block *sb, int reason); -void ext4_fc_start_ineligible(struct super_block *sb, int reason); -void ext4_fc_stop_ineligible(struct super_block *sb); +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle); void ext4_fc_start_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode); void ext4_fc_del(struct inode *inode); @@ -2935,6 +2934,10 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block); void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); +void ext4_fc_destroy_dentry_cache(void); +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, + int len, int replay); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; @@ -3096,6 +3099,9 @@ extern int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_fsblk_t n_blocks_count); extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); +extern unsigned int ext4_list_backups(struct super_block *sb, + unsigned int *three, unsigned int *five, + unsigned int *seven); /* super.c */ extern struct buffer_head *ext4_sb_bread(struct super_block *sb, @@ -3110,6 +3116,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait); extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); +extern __le32 ext4_superblock_csum(struct super_block *sb, + struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb); extern int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup); @@ -3402,7 +3410,7 @@ do { \ #define EXT4_FREECLUSTERS_WATERMARK 0 #endif -/* Update i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_disksize. Requires i_rwsem to avoid races with truncate */ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) { WARN_ON_ONCE(S_ISREG(inode->i_mode) && @@ -3413,7 +3421,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } -/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) { int changed = 0; @@ -3701,6 +3709,9 @@ extern int ext4_inode_block_valid(struct inode *inode, unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); +extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode, + ext4_fsblk_t start_blk, unsigned int count); + /* extents.c */ struct ext4_ext_path; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6def7339056d..3477a16d08ae 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, { if (!ext4_handle_valid(handle)) return 0; + if (is_handle_aborted(handle)) + return -EROFS; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 0e4fa644df01..db2ae4a2b38d 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks) /* * This function controls whether or not we should try to go down the * dioread_nolock code paths, which makes it safe to avoid taking - * i_mutex for direct I/O reads. This only works for extent-based + * i_rwsem for direct I/O reads. This only works for extent-based * files, and it doesn't work if data journaling is enabled, since the * dioread_nolock code uses b_private to pass information back to the * I/O completion handler, and this conflicts with the jbd's use of diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0ecf819bf189..0d98cf402282 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -27,8 +27,8 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/fiemap.h> -#include <linux/backing-dev.h> #include <linux/iomap.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" @@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); @@ -1496,8 +1496,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", ix != NULL ? le32_to_cpu(ix->ei_block) : 0, - EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? - le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, + le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block), depth); return -EFSCORRUPTED; } @@ -2025,7 +2024,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } @@ -2054,7 +2052,6 @@ prepend: + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } @@ -3371,7 +3368,6 @@ static int ext4_split_extent(handle_t *handle, return -EFSCORRUPTED; } unwritten = ext4_ext_is_unwritten(ex); - split_flag1 = 0; if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; @@ -4407,8 +4403,7 @@ retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry; } if (err) @@ -4416,8 +4411,7 @@ retry: retry_remove_space: err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry_remove_space; } return err; @@ -4577,7 +4571,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* Preallocate the range including the unaligned edges */ @@ -4647,8 +4641,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits, - (offset + len - 1) >> inode->i_sb->s_blocksize_bits); /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) @@ -4697,8 +4689,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - ext4_fc_start_update(inode); - if (mode & FALLOC_FL_PUNCH_HOLE) { ret = ext4_punch_hole(inode, offset, len); goto exit; @@ -4747,7 +4737,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); @@ -4762,7 +4752,6 @@ out: inode_unlock(inode); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); exit: - ext4_fc_stop_update(inode); return ret; } @@ -5344,7 +5333,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode, 0); @@ -5383,7 +5372,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: @@ -5485,7 +5473,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; @@ -5560,7 +5548,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: @@ -5583,7 +5570,7 @@ out_mutex: * stuff such as page-cache locking consistency, bh mapping consistency or * extent's data copying must be performed by caller. * Locking: - * i_mutex is held for both inodes + * i_rwsem is held for both inodes * i_data_sem is locked for write for both inodes * Assumptions: * All pages from requested range are locked for both inodes @@ -6103,11 +6090,15 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + 0, path[j].p_block, 1, 1); } ext4_ext_drop_refs(path); kfree(path); } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + map.m_lblk, map.m_pblk, map.m_len, 1); } cur = cur + map.m_len; } diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0f32b445582a..3d72565ec6e8 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -65,21 +65,11 @@ * * Fast Commit Ineligibility * ------------------------- - * Not all operations are supported by fast commits today (e.g extended - * attributes). Fast commit ineligibility is marked by calling one of the - * two following functions: - * - * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall - * back to full commit. This is useful in case of transient errors. * - * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all - * the fast commits happening between ext4_fc_start_ineligible() and - * ext4_fc_stop_ineligible() and one fast commit after the call to - * ext4_fc_stop_ineligible() to fall back to full commits. It is important to - * make one more fast commit to fall back to full commit after stop call so - * that it guaranteed that the fast commit ineligible operation contained - * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is - * followed by at least 1 full commit. + * Not all operations are supported by fast commits today (e.g extended + * attributes). Fast commit ineligibility is marked by calling + * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back + * to full commit. * * Atomicity of commits * -------------------- @@ -166,15 +156,13 @@ * fast commit recovery even if that area is invalidated by later full * commits. * - * 1) Make fast commit atomic updates more fine grained. Today, a fast commit - * eligible update must be protected within ext4_fc_start_update() and - * ext4_fc_stop_update(). These routines are called at much higher - * routines. This can be made more fine grained by combining with - * ext4_journal_start(). + * 1) Fast commit's commit path locks the entire file system during fast + * commit. This has significant performance penalty. Instead of that, we + * should use ext4_fc_start/stop_update functions to start inode level + * updates from ext4_journal_start/stop. Once we do that we can drop file + * system locking during commit path. * - * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - * - * 3) Handle more ineligible cases. + * 2) Handle more ineligible cases. */ #include <trace/events/ext4.h> @@ -211,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode) ext4_fc_reset_inode(inode); ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); INIT_LIST_HEAD(&ei->i_fc_list); + INIT_LIST_HEAD(&ei->i_fc_dilist); init_waitqueue_head(&ei->i_fc_wait); atomic_set(&ei->i_fc_updates, 0); } @@ -291,6 +280,8 @@ void ext4_fc_stop_update(struct inode *inode) void ext4_fc_del(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_fc_dentry_update *fc_dentry; if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) @@ -298,7 +289,7 @@ void ext4_fc_del(struct inode *inode) restart: spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); - if (list_empty(&ei->i_fc_list)) { + if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); return; } @@ -307,63 +298,64 @@ restart: ext4_fc_wait_committing_inode(inode); goto restart; } - list_del_init(&ei->i_fc_list); - spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); -} -/* - * Mark file system as fast commit ineligible. This means that next commit - * operation would result in a full jbd2 commit. - */ -void ext4_fc_mark_ineligible(struct super_block *sb, int reason) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); + if (!list_empty(&ei->i_fc_list)) + list_del_init(&ei->i_fc_list); - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) + /* + * Since this inode is getting removed, let's also remove all FC + * dentry create references, since it is not needed to log it anyways. + */ + if (list_empty(&ei->i_fc_dilist)) { + spin_unlock(&sbi->s_fc_lock); return; + } - ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - WARN_ON(reason >= EXT4_FC_REASON_MAX); - sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; -} + fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); + WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); + list_del_init(&fc_dentry->fcd_list); + list_del_init(&fc_dentry->fcd_dilist); -/* - * Start a fast commit ineligible update. Any commits that happen while - * such an operation is in progress fall back to full commits. - */ -void ext4_fc_start_ineligible(struct super_block *sb, int reason) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); + WARN_ON(!list_empty(&ei->i_fc_dilist)); + spin_unlock(&sbi->s_fc_lock); - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) - return; + if (fc_dentry->fcd_name.name && + fc_dentry->fcd_name.len > DNAME_INLINE_LEN) + kfree(fc_dentry->fcd_name.name); + kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); - WARN_ON(reason >= EXT4_FC_REASON_MAX); - sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; - atomic_inc(&sbi->s_fc_ineligible_updates); + return; } /* - * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here - * to ensure that after stopping the ineligible update, at least one full - * commit takes place. + * Mark file system as fast commit ineligible, and record latest + * ineligible transaction tid. This means until the recorded + * transaction, commit operation would result in a full jbd2 commit. */ -void ext4_fc_stop_ineligible(struct super_block *sb) +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) { + struct ext4_sb_info *sbi = EXT4_SB(sb); + tid_t tid; + if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return; ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); -} - -static inline int ext4_fc_is_ineligible(struct super_block *sb) -{ - return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) || - atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates)); + if (handle && !IS_ERR(handle)) + tid = handle->h_transaction->t_tid; + else { + read_lock(&sbi->s_journal->j_state_lock); + tid = sbi->s_journal->j_running_transaction ? + sbi->s_journal->j_running_transaction->t_tid : 0; + read_unlock(&sbi->s_journal->j_state_lock); + } + spin_lock(&sbi->s_fc_lock); + if (sbi->s_fc_ineligible_tid < tid) + sbi->s_fc_ineligible_tid = tid; + spin_unlock(&sbi->s_fc_lock); + WARN_ON(reason >= EXT4_FC_REASON_MAX); + sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; } /* @@ -387,13 +379,6 @@ static int ext4_fc_track_template( tid_t tid = 0; int ret; - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) - return -EOPNOTSUPP; - - if (ext4_fc_is_ineligible(inode->i_sb)) - return -EINVAL; - tid = handle->h_transaction->t_tid; mutex_lock(&ei->i_fc_lock); if (tid == ei->i_sync_tid) { @@ -411,7 +396,8 @@ static int ext4_fc_track_template( spin_lock(&sbi->s_fc_lock); if (list_empty(&EXT4_I(inode)->i_fc_list)) list_add_tail(&EXT4_I(inode)->i_fc_list, - (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? + (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? &sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_MAIN]); spin_unlock(&sbi->s_fc_lock); @@ -437,7 +423,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) mutex_unlock(&ei->i_fc_lock); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -450,7 +436,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_NOMEM); + EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -462,13 +448,28 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.name = node->fcd_iname; } node->fcd_name.len = dentry->d_name.len; - + INIT_LIST_HEAD(&node->fcd_dilist); spin_lock(&sbi->s_fc_lock); - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) + if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_STAGING]); else list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); + + /* + * This helps us keep a track of all fc_dentry updates which is part of + * this ext4 inode. So in case the inode is getting unlinked, before + * even we get a chance to fsync, we could remove all fc_dentry + * references while evicting the inode in ext4_fc_del(). + * Also with this, we don't need to loop over all the inodes in + * sbi->s_fc_q to get the corresponding inode in + * ext4_fc_commit_dentry_updates(). + */ + if (dentry_update->op == EXT4_FC_TAG_CREAT) { + WARN_ON(!list_empty(&ei->i_fc_dilist)); + list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); + } spin_unlock(&sbi->s_fc_lock); mutex_lock(&ei->i_fc_lock); @@ -486,12 +487,22 @@ void __ext4_fc_track_unlink(handle_t *handle, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_unlink(inode, dentry, ret); + trace_ext4_fc_track_unlink(handle, inode, dentry, ret); } void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_unlink(handle, inode, dentry); } void __ext4_fc_track_link(handle_t *handle, @@ -505,12 +516,22 @@ void __ext4_fc_track_link(handle_t *handle, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_link(inode, dentry, ret); + trace_ext4_fc_track_link(handle, inode, dentry, ret); } void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_link(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_link(handle, inode, dentry); } void __ext4_fc_track_create(handle_t *handle, struct inode *inode, @@ -524,12 +545,22 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); - trace_ext4_fc_track_create(inode, dentry, ret); + trace_ext4_fc_track_create(handle, inode, dentry, ret); } void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) { - __ext4_fc_track_create(handle, d_inode(dentry), dentry); + struct inode *inode = d_inode(dentry); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + + __ext4_fc_track_create(handle, inode, dentry); } /* __track_fn for inode tracking */ @@ -545,6 +576,7 @@ static int __track_inode(struct inode *inode, void *arg, bool update) void ext4_fc_track_inode(handle_t *handle, struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int ret; if (S_ISDIR(inode->i_mode)) @@ -552,12 +584,19 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (ext4_should_journal_data(inode)) { ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_INODE_JOURNAL_DATA); + EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); return; } + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); - trace_ext4_fc_track_inode(inode, ret); + trace_ext4_fc_track_inode(handle, inode, ret); } struct __track_range_args { @@ -595,18 +634,26 @@ static int __track_range(struct inode *inode, void *arg, bool update) void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, ext4_lblk_t end) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct __track_range_args args; int ret; if (S_ISDIR(inode->i_mode)) return; + if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return; + + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return; + args.start = start; args.end = end; ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); - trace_ext4_fc_track_range(inode, start, end, ret); + trace_ext4_fc_track_range(handle, inode, start, end, ret); } static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) @@ -796,7 +843,6 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); dst += sizeof(fcd); ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); - dst += dlen; return true; } @@ -930,7 +976,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) int ret = 0; spin_lock(&sbi->s_fc_lock); - ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); while (atomic_read(&ei->i_fc_updates)) { @@ -990,7 +1035,7 @@ __releases(&sbi->s_fc_lock) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; struct inode *inode; - struct ext4_inode_info *ei, *ei_n; + struct ext4_inode_info *ei; int ret; if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) @@ -1006,21 +1051,16 @@ __releases(&sbi->s_fc_lock) spin_lock(&sbi->s_fc_lock); continue; } - - inode = NULL; - list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN], - i_fc_list) { - if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) { - inode = &ei->vfs_inode; - break; - } - } /* - * If we don't find inode in our list, then it was deleted, - * in which case, we don't need to record it's create tag. + * With fcd_dilist we need not loop in sbi->s_fc_q to get the + * corresponding inode pointer */ - if (!inode) - continue; + WARN_ON(list_empty(&fc_dentry->fcd_dilist)); + ei = list_first_entry(&fc_dentry->fcd_dilist, + struct ext4_inode_info, i_fc_dilist); + inode = &ei->vfs_inode; + WARN_ON(inode->i_ino != fc_dentry->fcd_ino); + spin_unlock(&sbi->s_fc_lock); /* @@ -1123,6 +1163,33 @@ out: return ret; } +static void ext4_fc_update_stats(struct super_block *sb, int status, + u64 commit_time, int nblks, tid_t commit_tid) +{ + struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; + + jbd_debug(1, "Fast commit ended with status = %d for tid %u", + status, commit_tid); + if (status == EXT4_FC_STATUS_OK) { + stats->fc_num_commits++; + stats->fc_numblks += nblks; + if (likely(stats->s_fc_avg_commit_time)) + stats->s_fc_avg_commit_time = + (commit_time + + stats->s_fc_avg_commit_time * 3) / 4; + else + stats->s_fc_avg_commit_time = commit_time; + } else if (status == EXT4_FC_STATUS_FAILED || + status == EXT4_FC_STATUS_INELIGIBLE) { + if (status == EXT4_FC_STATUS_FAILED) + stats->fc_failed_commits++; + stats->fc_ineligible_commits++; + } else { + stats->fc_skipped_commits++; + } + trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); +} + /* * The main commit entry point. Performs a fast commit for transaction * commit_tid if needed. If it's not possible to perform a fast commit @@ -1135,18 +1202,15 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) struct ext4_sb_info *sbi = EXT4_SB(sb); int nblks = 0, ret, bsize = journal->j_blocksize; int subtid = atomic_read(&sbi->s_fc_subtid); - int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; + int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; ktime_t start_time, commit_time; - trace_ext4_fc_commit_start(sb); + if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) + return jbd2_complete_transaction(journal, commit_tid); - start_time = ktime_get(); + trace_ext4_fc_commit_start(sb, commit_tid); - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (ext4_fc_is_ineligible(sb))) { - reason = EXT4_FC_REASON_INELIGIBLE; - goto out; - } + start_time = ktime_get(); restart_fc: ret = jbd2_fc_begin_commit(journal, commit_tid); @@ -1155,74 +1219,61 @@ restart_fc: if (atomic_read(&sbi->s_fc_subtid) <= subtid && commit_tid > journal->j_commit_sequence) goto restart_fc; - reason = EXT4_FC_REASON_ALREADY_COMMITTED; - goto out; + ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, + commit_tid); + return 0; } else if (ret) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_START_FAILED; - goto out; + /* + * Commit couldn't start. Just update stats and perform a + * full commit. + */ + ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, + commit_tid); + return jbd2_complete_transaction(journal, commit_tid); + } + + /* + * After establishing journal barrier via jbd2_fc_begin_commit(), check + * if we are fast commit ineligible. + */ + if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { + status = EXT4_FC_STATUS_INELIGIBLE; + goto fallback; } fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; ret = ext4_fc_perform_commit(journal); if (ret < 0) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_FAILED; - goto out; + status = EXT4_FC_STATUS_FAILED; + goto fallback; } nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; ret = jbd2_fc_wait_bufs(journal, nblks); if (ret < 0) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_FAILED; - goto out; + status = EXT4_FC_STATUS_FAILED; + goto fallback; } atomic_inc(&sbi->s_fc_subtid); - jbd2_fc_end_commit(journal); -out: - /* Has any ineligible update happened since we started? */ - if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_INELIGIBLE; - } - - spin_lock(&sbi->s_fc_lock); - if (reason != EXT4_FC_REASON_OK && - reason != EXT4_FC_REASON_ALREADY_COMMITTED) { - sbi->s_fc_stats.fc_ineligible_commits++; - } else { - sbi->s_fc_stats.fc_num_commits++; - sbi->s_fc_stats.fc_numblks += nblks; - } - spin_unlock(&sbi->s_fc_lock); - nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; - trace_ext4_fc_commit_stop(sb, nblks, reason); - commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + ret = jbd2_fc_end_commit(journal); /* - * weight the commit time higher than the average time so we don't - * react too strongly to vast changes in the commit time + * weight the commit time higher than the average time so we + * don't react too strongly to vast changes in the commit time */ - if (likely(sbi->s_fc_avg_commit_time)) - sbi->s_fc_avg_commit_time = (commit_time + - sbi->s_fc_avg_commit_time * 3) / 4; - else - sbi->s_fc_avg_commit_time = commit_time; - jbd_debug(1, - "Fast commit ended with blks = %d, reason = %d, subtid - %d", - nblks, reason, subtid); - if (reason == EXT4_FC_REASON_FC_FAILED) - return jbd2_fc_end_commit_fallback(journal); - if (reason == EXT4_FC_REASON_FC_START_FAILED || - reason == EXT4_FC_REASON_INELIGIBLE) - return jbd2_complete_transaction(journal, commit_tid); - return 0; + commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); + return ret; + +fallback: + ret = jbd2_fc_end_commit_fallback(journal); + ext4_fc_update_stats(sb, status, 0, 0, commit_tid); + return ret; } /* * Fast commit cleanup routine. This is called after every fast commit and * full commit. full is true if we are called after a full commit. */ -static void ext4_fc_cleanup(journal_t *journal, int full) +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) { struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1232,6 +1283,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) if (full && sbi->s_fc_bh) sbi->s_fc_bh = NULL; + trace_ext4_fc_cleanup(journal, full, tid); jbd2_fc_release_bufs(journal); spin_lock(&sbi->s_fc_lock); @@ -1240,7 +1292,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); - ext4_fc_reset_inode(&iter->vfs_inode); + if (iter->i_sync_tid <= tid) + ext4_fc_reset_inode(&iter->vfs_inode); /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); #if (BITS_PER_LONG < 64) @@ -1255,6 +1308,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) struct ext4_fc_dentry_update, fcd_list); list_del_init(&fc_dentry->fcd_list); + list_del_init(&fc_dentry->fcd_dilist); spin_unlock(&sbi->s_fc_lock); if (fc_dentry->fcd_name.name && @@ -1269,8 +1323,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_MAIN]); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (tid >= sbi->s_fc_ineligible_tid) { + sbi->s_fc_ineligible_tid = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + } if (full) sbi->s_fc_bytes = 0; @@ -1435,14 +1491,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, sizeof(int) * - state->fc_modified_inodes_size, - GFP_KERNEL); + state->fc_modified_inodes, + sizeof(int) * (state->fc_modified_inodes_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); if (!state->fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; } state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; return 0; @@ -1474,7 +1531,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, } inode = NULL; - ext4_fc_record_modified_inode(sb, ino); + ret = ext4_fc_record_modified_inode(sb, ino); + if (ret) + goto out; raw_fc_inode = (struct ext4_inode *) (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); @@ -1606,16 +1665,23 @@ out: } /* - * Record physical disk regions which are in use as per fast commit area. Our - * simple replay phase allocator excludes these regions from allocation. + * Record physical disk regions which are in use as per fast commit area, + * and used by inodes during replay phase. Our simple replay phase + * allocator excludes these regions from allocation. */ -static int ext4_fc_record_regions(struct super_block *sb, int ino, - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) { struct ext4_fc_replay_state *state; struct ext4_fc_alloc_region *region; state = &EXT4_SB(sb)->s_fc_replay_state; + /* + * during replay phase, the fc_regions_valid may not same as + * fc_regions_used, update it when do new additions. + */ + if (replay && state->fc_regions_used != state->fc_regions_valid) + state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; @@ -1633,6 +1699,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, region->pblk = pblk; region->len = len; + if (replay) + state->fc_regions_valid++; + return 0; } @@ -1664,6 +1733,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; start = le32_to_cpu(ex->ee_block); start_pblk = ext4_ext_pblock(ex); @@ -1681,18 +1752,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, map.m_pblk = 0; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret == 0) { /* Range is not mapped */ path = ext4_find_extent(inode, cur, NULL, 0); - if (IS_ERR(path)) { - iput(inode); - return 0; - } + if (IS_ERR(path)) + goto out; memset(&newex, 0, sizeof(newex)); newex.ee_block = cpu_to_le32(cur); ext4_ext_store_pblock( @@ -1706,10 +1773,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); ext4_ext_drop_refs(path); kfree(path); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; goto next; } @@ -1722,10 +1787,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), start_pblk + cur - start); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * Mark the old blocks as free since they aren't used * anymore. We maintain an array of all the modified @@ -1745,10 +1808,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ext4_ext_is_unwritten(ex), map.m_pblk); ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), map.m_pblk); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * We may have split the extent tree while toggling the state. * Try to shrink the extent tree now. @@ -1760,6 +1821,7 @@ next: } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); +out: iput(inode); return 0; } @@ -1789,6 +1851,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", inode->i_ino, le32_to_cpu(lrange.fc_lblk), @@ -1798,10 +1862,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, map.m_len = remaining; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret > 0) { remaining -= ret; cur += ret; @@ -1812,16 +1874,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } } - ret = ext4_punch_hole(inode, - le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits, - le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits); + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), + le32_to_cpu(lrange.fc_lblk) + + le32_to_cpu(lrange.fc_len) - 1); + up_write(&EXT4_I(inode)->i_data_sem); if (ret) - jbd_debug(1, "ext4_punch_hole returned %d", ret); + goto out; ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); +out: iput(inode); - return 0; } @@ -1892,8 +1956,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) if (state->fc_regions[i].ino == 0 || state->fc_regions[i].len == 0) continue; - if (blk >= state->fc_regions[i].pblk && - blk < state->fc_regions[i].pblk + state->fc_regions[i].len) + if (in_range(blk, state->fc_regions[i].pblk, + state->fc_regions[i].len)) return true; } return false; @@ -1977,7 +2041,7 @@ static int ext4_fc_replay_scan(journal_t *journal, ret = ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_get_actual_len(ex)); + ext4_ext_get_actual_len(ex), 0); if (ret < 0) break; ret = JBD2_FC_REPLAY_CONTINUE; @@ -2173,7 +2237,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v) "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", stats->fc_num_commits, stats->fc_ineligible_commits, stats->fc_numblks, - div_u64(sbi->s_fc_avg_commit_time, 1000)); + div_u64(stats->s_fc_avg_commit_time, 1000)); seq_puts(seq, "Ineligible reasons:\n"); for (i = 0; i < EXT4_FC_REASON_MAX; i++) seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], @@ -2192,3 +2256,8 @@ int __init ext4_fc_init_dentry_cache(void) return 0; } + +void ext4_fc_destroy_dentry_cache(void) +{ + kmem_cache_destroy(ext4_fc_dentry_cachep); +} diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 937c381b4c85..1db12847a83b 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -55,13 +55,13 @@ struct ext4_fc_del_range { struct ext4_fc_dentry_info { __le32 fc_parent_ino; __le32 fc_ino; - __u8 fc_dname[0]; + __u8 fc_dname[]; }; /* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ struct ext4_fc_inode { __le32 fc_ino; - __u8 fc_raw_inode[0]; + __u8 fc_raw_inode[]; }; /* Value structure for tag EXT4_FC_TAG_TAIL. */ @@ -71,21 +71,19 @@ struct ext4_fc_tail { }; /* - * Fast commit reason codes + * Fast commit status codes + */ +enum { + EXT4_FC_STATUS_OK = 0, + EXT4_FC_STATUS_INELIGIBLE, + EXT4_FC_STATUS_SKIPPED, + EXT4_FC_STATUS_FAILED, +}; + +/* + * Fast commit ineligiblity reasons: */ enum { - /* - * Commit status codes: - */ - EXT4_FC_REASON_OK = 0, - EXT4_FC_REASON_INELIGIBLE, - EXT4_FC_REASON_ALREADY_COMMITTED, - EXT4_FC_REASON_FC_START_FAILED, - EXT4_FC_REASON_FC_FAILED, - - /* - * Fast commit ineligiblity reasons: - */ EXT4_FC_REASON_XATTR = 0, EXT4_FC_REASON_CROSS_RENAME, EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, @@ -95,7 +93,6 @@ enum { EXT4_FC_REASON_RENAME_DIR, EXT4_FC_REASON_FALLOC_RANGE, EXT4_FC_REASON_INODE_JOURNAL_DATA, - EXT4_FC_COMMIT_FAILED, EXT4_FC_REASON_MAX }; @@ -111,13 +108,17 @@ struct ext4_fc_dentry_update { struct qstr fcd_name; /* Dirent name */ unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ struct list_head fcd_list; + struct list_head fcd_dilist; }; struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; unsigned long fc_ineligible_commits; + unsigned long fc_failed_commits; + unsigned long fc_skipped_commits; unsigned long fc_numblks; + u64 s_fc_avg_commit_time; }; #define EXT4_FC_REPLAY_REALLOC_INCREMENT 4 diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4c5f41052351..8bd66cdc41be 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -36,9 +36,11 @@ #include "acl.h" #include "truncate.h" -static bool ext4_dio_supported(struct inode *inode) +static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) { - if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) + struct inode *inode = file_inode(iocb->ki_filp); + + if (!fscrypt_dio_supported(iocb, iter)) return false; if (fsverity_active(inode)) return false; @@ -61,7 +63,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); } - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on @@ -259,7 +261,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) return -EOPNOTSUPP; - ext4_fc_start_update(inode); inode_lock(inode); ret = ext4_write_checks(iocb, from); if (ret <= 0) @@ -271,7 +272,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, out: inode_unlock(inode); - ext4_fc_stop_update(inode); if (likely(ret > 0)) { iocb->ki_pos += ret; ret = generic_write_sync(iocb, ret); @@ -511,7 +511,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* Fallback to buffered I/O if the inode does not support direct I/O. */ - if (!ext4_dio_supported(inode)) { + if (!ext4_dio_supported(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else @@ -552,9 +552,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - ext4_fc_start_update(inode); ret = ext4_orphan_add(handle, inode); - ext4_fc_stop_update(inode); if (ret) { ext4_journal_stop(handle); goto out; diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index f34f4176c1e7..147b5241dd94 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -290,7 +290,7 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len, int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 89efa78ed4b2..07a8c75b65ed 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 39a1ab129fdc..9c076262770d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -7,7 +7,7 @@ #include <linux/iomap.h> #include <linux/fiemap.h> #include <linux/iversion.h> -#include <linux/backing-dev.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, struct page **pagep, void **fsdata) { - int ret, inline_size; + int ret; handle_t *handle; struct page *page; struct ext4_iloc iloc; @@ -928,14 +928,9 @@ retry_journal: goto out; } - inline_size = ext4_get_max_inline_size(inode); - - ret = -ENOSPC; - if (inline_size >= pos + len) { - ret = ext4_prepare_inline_data(handle, inode, pos + len); - if (ret && ret != -ENOSPC) - goto out_journal; - } + ret = ext4_prepare_inline_data(handle, inode, pos + len); + if (ret && ret != -ENOSPC) + goto out_journal; /* * We cannot recurse into the filesystem as the transaction @@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } @@ -1780,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - bool ret = true; + bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); - return true; + return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; + ret = true; goto out; } @@ -1801,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = true; goto out; } @@ -1820,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = true; goto out; } if (le32_to_cpu(de->inode)) { - ret = false; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } + ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); @@ -1929,8 +1931,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) retry: err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry; } if (err) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bfd3545f1e5d..1ce13f69fbec 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/iomap.h> #include <linux/iversion.h> +#include <linux/dax.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -136,8 +137,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, new_size); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents); @@ -185,7 +184,7 @@ void ext4_evict_inode(struct inode *inode) * journal. So although mm thinks everything is clean and * ready for reaping the inode might still have some pages to * write in the running transaction or waiting to be - * checkpointed. Thus calling jbd2_journal_invalidatepage() + * checkpointed. Thus calling jbd2_journal_invalidate_folio() * (via truncate_inode_pages()) to discard these buffers can * cause data loss. Also even if we did not discard these * buffers, we would have no way to find them after the inode @@ -337,7 +336,7 @@ stop_handle: return; no_delete: if (!list_empty(&EXT4_I(inode)->i_fc_list)) - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } @@ -741,10 +740,11 @@ out_sem: if (ret) return ret; } - ext4_fc_track_range(handle, inode, map->m_lblk, - map->m_lblk + map->m_len - 1); } - + if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || + map->m_flags & EXT4_MAP_MAPPED)) + ext4_fc_track_range(handle, inode, map->m_lblk, + map->m_lblk + map->m_len - 1); if (retval < 0) ext_debug(inode, "failed with err %d\n", retval); return retval; @@ -1222,7 +1222,7 @@ retry_journal: /* * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. + * i_size_read because we hold i_rwsem. * * Add inode to orphan list in case we crash before * truncate finishes @@ -1569,16 +1569,18 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + struct folio *folio = page_folio(page); - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); if (invalidate) { - if (page_mapped(page)) - clear_page_dirty_for_io(page); - block_invalidatepage(page, 0, PAGE_SIZE); - ClearPageUptodate(page); + if (folio_mapped(folio)) + folio_clear_dirty_for_io(folio); + block_invalidate_folio(folio, 0, + folio_size(folio)); + folio_clear_uptodate(folio); } - unlock_page(page); + folio_unlock(folio); } pagevec_release(&pvec); } @@ -1844,30 +1846,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return 0; } -static int bget_one(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - get_bh(bh); - return 0; -} - -static int bput_one(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - put_bh(bh); - return 0; -} - static int __ext4_journalled_writepage(struct page *page, unsigned int len) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct buffer_head *page_bufs = NULL; handle_t *handle = NULL; int ret = 0, err = 0; int inline_data = ext4_has_inline_data(inode); struct buffer_head *inode_bh = NULL; + loff_t size; ClearPageChecked(page); @@ -1877,14 +1865,6 @@ static int __ext4_journalled_writepage(struct page *page, inode_bh = ext4_journalled_write_inline_data(inode, len, page); if (inode_bh == NULL) goto out; - } else { - page_bufs = page_buffers(page); - if (!page_bufs) { - BUG(); - goto out; - } - ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, - NULL, bget_one); } /* * We need to release the page lock before we start the @@ -1905,7 +1885,8 @@ static int __ext4_journalled_writepage(struct page *page, lock_page(page); put_page(page); - if (page->mapping != mapping) { + size = i_size_read(inode); + if (page->mapping != mapping || page_offset(page) > size) { /* The page got truncated from under us */ ext4_journal_stop(handle); ret = 0; @@ -1915,6 +1896,13 @@ static int __ext4_journalled_writepage(struct page *page, if (inline_data) { ret = ext4_mark_inode_dirty(handle, inode); } else { + struct buffer_head *page_bufs = page_buffers(page); + + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; + else + len = PAGE_SIZE; + ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -1935,9 +1923,6 @@ static int __ext4_journalled_writepage(struct page *page, out: unlock_page(page); out_no_pagelock: - if (!inline_data && page_bufs) - ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len, - NULL, bput_one); brelse(inode_bh); return ret; } @@ -1986,6 +1971,7 @@ out_no_pagelock: static int ext4_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); int ret = 0; loff_t size; unsigned int len; @@ -1995,8 +1981,8 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); - unlock_page(page); + folio_invalidate(folio, 0, folio_size(folio)); + folio_unlock(folio); return -EIO; } @@ -2008,6 +1994,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2257,7 +2252,6 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, mpd->map.m_len = 0; mpd->map.m_flags = 0; io_end_vec->size += io_end_size; - io_end_size = 0; err = mpage_process_page_bufs(mpd, head, bh, lblk); if (err > 0) @@ -2282,7 +2276,6 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, } while (lblk++, (bh = bh->b_this_page) != head); io_end_vec->size += io_end_size; - io_end_size = 0; *map_bh = false; out: *m_lblk = lblk; @@ -2611,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; @@ -3199,40 +3208,39 @@ static void ext4_readahead(struct readahead_control *rac) ext4_mpage_readpages(inode, rac, NULL); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void ext4_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - trace_ext4_invalidatepage(page, offset, length); + trace_ext4_invalidate_folio(folio, offset, length); /* No journalling happens on data buffers when this function is used */ - WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); + WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio))); - block_invalidatepage(page, offset, length); + block_invalidate_folio(folio, offset, length); } -static int __ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static int __ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, size_t length) { - journal_t *journal = EXT4_JOURNAL(page->mapping->host); + journal_t *journal = EXT4_JOURNAL(folio->mapping->host); - trace_ext4_journalled_invalidatepage(page, offset, length); + trace_ext4_journalled_invalidate_folio(folio, offset, length); /* * If it's a full truncate we just forget about the pending dirtying */ - if (offset == 0 && length == PAGE_SIZE) - ClearPageChecked(page); + if (offset == 0 && length == folio_size(folio)) + folio_clear_checked(folio); - return jbd2_journal_invalidatepage(journal, page, offset, length); + return jbd2_journal_invalidate_folio(journal, folio, offset, length); } /* Wrapper for aops... */ -static void ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static void ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, + size_t length) { - WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); + WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0); } static int ext4_releasepage(struct page *page, gfp_t wait) @@ -3271,7 +3279,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, struct ext4_map_blocks *map, loff_t offset, - loff_t length) + loff_t length, unsigned int flags) { u8 blkbits = inode->i_blkbits; @@ -3288,8 +3296,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_NEW) iomap->flags |= IOMAP_F_NEW; - iomap->bdev = inode->i_sb->s_bdev; - iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + if (flags & IOMAP_DAX) + iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64) map->m_lblk << blkbits; iomap->length = (u64) map->m_len << blkbits; @@ -3309,9 +3319,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_UNWRITTEN) { iomap->type = IOMAP_UNWRITTEN; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else if (map->m_flags & EXT4_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3348,8 +3362,8 @@ retry: * DAX and direct I/O are the only two operations that are currently * supported with IOMAP_WRITE. */ - WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); - if (IS_DAX(inode)) + WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT))); + if (flags & IOMAP_DAX) m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; /* * We use i_size instead of i_disksize here because delalloc writeback @@ -3420,7 +3434,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: - ext4_set_iomap(inode, iomap, &map, offset, length); + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + + ext4_set_iomap(inode, iomap, &map, offset, length, flags); return 0; } @@ -3540,7 +3561,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, delalloc = ext4_iomap_is_delalloc(inode, &map); set_iomap: - ext4_set_iomap(inode, iomap, &map, offset, length); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); if (delalloc && iomap->type == IOMAP_HOLE) iomap->type = IOMAP_DELALLOC; @@ -3552,29 +3573,32 @@ const struct iomap_ops ext4_iomap_report_ops = { }; /* - * Pages can be marked dirty completely asynchronously from ext4's journalling - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do - * much here because ->set_page_dirty is called under VFS locks. The page is - * not necessarily locked. + * Whenever the folio is being dirtied, corresponding buffers should already + * be attached to the transaction (we take care of this in ext4_page_mkwrite() + * and ext4_write_begin()). However we cannot move buffers to dirty transaction + * lists here because ->dirty_folio is called under VFS locks and the folio + * is not necessarily locked. * - * We cannot just dirty the page and leave attached buffers clean, because the + * We cannot just dirty the folio and leave attached buffers clean, because the * buffers' dirty state is "definitive". We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * - * So what we do is to mark the page "pending dirty" and next time writepage + * So what we do is to mark the folio "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */ -static int ext4_journalled_set_page_dirty(struct page *page) +static bool ext4_journalled_dirty_folio(struct address_space *mapping, + struct folio *folio) { - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); + WARN_ON_ONCE(!page_has_buffers(&folio->page)); + folio_set_checked(folio); + return filemap_dirty_folio(mapping, folio); } -static int ext4_set_page_dirty(struct page *page) +static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio) { - WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); - WARN_ON_ONCE(!page_has_buffers(page)); - return __set_page_dirty_buffers(page); + WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio)); + WARN_ON_ONCE(!folio_buffers(folio)); + return block_dirty_folio(mapping, folio); } static int ext4_iomap_swap_activate(struct swap_info_struct *sis, @@ -3591,9 +3615,9 @@ static const struct address_space_operations ext4_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3609,9 +3633,9 @@ static const struct address_space_operations ext4_journalled_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, + .dirty_folio = ext4_journalled_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_journalled_invalidatepage, + .invalidate_folio = ext4_journalled_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .is_partially_uptodate = block_is_partially_uptodate, @@ -3626,9 +3650,9 @@ static const struct address_space_operations ext4_da_aops = { .writepages = ext4_writepages, .write_begin = ext4_da_write_begin, .write_end = ext4_da_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3640,9 +3664,8 @@ static const struct address_space_operations ext4_da_aops = { static const struct address_space_operations ext4_dax_aops = { .writepages = ext4_dax_writepages, .direct_IO = noop_direct_IO, - .set_page_dirty = __set_page_dirty_no_writeback, + .dirty_folio = noop_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = noop_invalidatepage, .swap_activate = ext4_iomap_swap_activate, }; @@ -3780,8 +3803,8 @@ static int ext4_block_zero_page_range(handle_t *handle, length = max; if (IS_DAX(inode)) { - return iomap_zero_range(inode, from, length, NULL, - &ext4_iomap_ops); + return dax_zero_range(inode, from, length, NULL, + &ext4_iomap_ops); } return __ext4_block_zero_page_range(handle, mapping, from, length); } @@ -3990,7 +4013,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* @@ -4140,7 +4163,7 @@ int ext4_truncate(struct inode *inode) /* * There is a possibility that we're either freeing the inode * or it's a completely new inode. In those cases we might not - * have i_mutex locked because it's not necessary. + * have i_rwsem locked because it's not necessary. */ if (!(inode->i_state & (I_NEW|I_FREEING))) WARN_ON(!inode_is_locked(inode)); @@ -4523,7 +4546,7 @@ has_buffer: static int __ext4_get_inode_loc_noinmem(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, @@ -4538,7 +4561,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, @@ -5215,13 +5238,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } /* - * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate - * buffers that are attached to a page stradding i_size and are undergoing + * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate + * buffers that are attached to a folio straddling i_size and are undergoing * commit. In that case we have to wait for commit to finish and try again. */ static void ext4_wait_for_tail_page_commit(struct inode *inode) { - struct page *page; unsigned offset; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = 0; @@ -5229,25 +5251,25 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * If the page is fully truncated, we don't need to wait for any commit - * (and we even should not as __ext4_journalled_invalidatepage() may - * strip all buffers from the page but keep the page dirty which can then - * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * If the folio is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidate_folio() may + * strip all buffers from the folio but keep the folio dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty folio without * buffers). Also we don't need to wait for any commit if all buffers in - * the page remain valid. This is most beneficial for the common case of + * the folio remain valid. This is most beneficial for the common case of * blocksize == PAGESIZE. */ if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { - page = find_lock_page(inode->i_mapping, + struct folio *folio = filemap_lock_folio(inode->i_mapping, inode->i_size >> PAGE_SHIFT); - if (!page) + if (!folio) return; - ret = __ext4_journalled_invalidatepage(page, offset, - PAGE_SIZE - offset); - unlock_page(page); - put_page(page); + ret = __ext4_journalled_invalidate_folio(folio, offset, + folio_size(folio) - offset); + folio_unlock(folio); + folio_put(folio); if (ret != -EBUSY) return; commit_tid = 0; @@ -5282,7 +5304,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * transaction are already on disk (truncate waits for pages under * writeback). * - * Called with inode->i_mutex down. + * Called with inode->i_rwsem down. */ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr) @@ -5320,7 +5342,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; } - ext4_fc_start_update(inode); + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; @@ -5344,7 +5366,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) { ext4_journal_stop(handle); - ext4_fc_stop_update(inode); return error; } /* Update corresponding info in inode so that everything is in @@ -5356,7 +5377,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); if (unlikely(error)) { - ext4_fc_stop_update(inode); return error; } } @@ -5370,12 +5390,10 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (attr->ia_size > sbi->s_bitmap_maxbytes) { - ext4_fc_stop_update(inode); return -EFBIG; } } if (!S_ISREG(inode->i_mode)) { - ext4_fc_stop_update(inode); return -EINVAL; } @@ -5427,8 +5445,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, - (oldsize > 0 ? oldsize - 1 : 0) >> - inode->i_sb->s_blocksize_bits); + EXT_MAX_BLOCKS - 1); else ext4_fc_track_range( handle, inode, @@ -5499,7 +5516,6 @@ err_out: ext4_std_error(inode->i_sb, error); if (!error) error = rc; - ext4_fc_stop_update(inode); return error; } @@ -6000,7 +6016,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return PTR_ERR(handle); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); + EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle); err = ext4_mark_inode_dirty(handle, inode); ext4_handle_sync(handle); ext4_journal_stop(handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 606dee9e08a3..992229ca2d83 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -27,7 +27,249 @@ #include "fsmap.h" #include <trace/events/ext4.h> -/** +typedef void ext4_update_sb_callback(struct ext4_super_block *es, + const void *arg); + +/* + * Superblock modification callback function for changing file system + * label + */ +static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) +{ + /* Sanity check, this should never happen */ + BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX); + + memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX); +} + +static +int ext4_update_primary_sb(struct super_block *sb, handle_t *handle, + ext4_update_sb_callback func, + const void *arg) +{ + int err = 0; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh = sbi->s_sbh; + struct ext4_super_block *es = sbi->s_es; + + trace_ext4_update_sb(sb, bh->b_blocknr, 1); + + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, + bh, + EXT4_JTR_NONE); + if (err) + goto out_err; + + lock_buffer(bh); + func(es, arg); + ext4_superblock_csum_set(sb); + unlock_buffer(bh); + + if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) { + ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " + "superblock detected"); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_err; + err = sync_dirty_buffer(bh); +out_err: + ext4_std_error(sb, err); + return err; +} + +/* + * Update one backup superblock in the group 'grp' using the callback + * function 'func' and argument 'arg'. If the handle is NULL the + * modification is not journalled. + * + * Returns: 0 when no modification was done (no superblock in the group) + * 1 when the modification was successful + * <0 on error + */ +static int ext4_update_backup_sb(struct super_block *sb, + handle_t *handle, ext4_group_t grp, + ext4_update_sb_callback func, const void *arg) +{ + int err = 0; + ext4_fsblk_t sb_block; + struct buffer_head *bh; + unsigned long offset = 0; + struct ext4_super_block *es; + + if (!ext4_bg_has_super(sb, grp)) + return 0; + + /* + * For the group 0 there is always 1k padding, so we have + * either adjust offset, or sb_block depending on blocksize + */ + if (grp == 0) { + sb_block = 1 * EXT4_MIN_BLOCK_SIZE; + offset = do_div(sb_block, sb->s_blocksize); + } else { + sb_block = ext4_group_first_block_no(sb, grp); + offset = 0; + } + + trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0); + + bh = ext4_sb_bread(sb, sb_block, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); + + if (handle) { + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, + bh, + EXT4_JTR_NONE); + if (err) + goto out_bh; + } + + es = (struct ext4_super_block *) (bh->b_data + offset); + lock_buffer(bh); + if (ext4_has_metadata_csum(sb) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " + "superblock %llu\n", sb_block); + unlock_buffer(bh); + err = -EFSBADCRC; + goto out_bh; + } + func(es, arg); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if (err) + goto out_bh; + + if (handle) { + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_bh; + } else { + BUFFER_TRACE(bh, "marking dirty"); + mark_buffer_dirty(bh); + } + err = sync_dirty_buffer(bh); + +out_bh: + brelse(bh); + ext4_std_error(sb, err); + return (err) ? err : 1; +} + +/* + * Update primary and backup superblocks using the provided function + * func and argument arg. + * + * Only the primary superblock and at most two backup superblock + * modifications are journalled; the rest is modified without journal. + * This is safe because e2fsck will re-write them if there is a problem, + * and we're very unlikely to ever need more than two backups. + */ +static +int ext4_update_superblocks_fn(struct super_block *sb, + ext4_update_sb_callback func, + const void *arg) +{ + handle_t *handle; + ext4_group_t ngroups; + unsigned int three = 1; + unsigned int five = 5; + unsigned int seven = 7; + int err = 0, ret, i; + ext4_group_t grp, primary_grp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* + * We can't update superblocks while the online resize is running + */ + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, + &sbi->s_ext4_flags)) { + ext4_msg(sb, KERN_ERR, "Can't modify superblock while" + "performing online resize"); + return -EBUSY; + } + + /* + * We're only going to update primary superblock and two + * backup superblocks in this transaction. + */ + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out; + } + + /* Update primary superblock */ + err = ext4_update_primary_sb(sb, handle, func, arg); + if (err) { + ext4_msg(sb, KERN_ERR, "Failed to update primary " + "superblock"); + goto out_journal; + } + + primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr); + ngroups = ext4_get_groups_count(sb); + + /* + * Update backup superblocks. We have to start from group 0 + * because it might not be where the primary superblock is + * if the fs is mounted with -o sb=<backup_sb_block> + */ + i = 0; + grp = 0; + while (grp < ngroups) { + /* Skip primary superblock */ + if (grp == primary_grp) + goto next_grp; + + ret = ext4_update_backup_sb(sb, handle, grp, func, arg); + if (ret < 0) { + /* Ignore bad checksum; try to update next sb */ + if (ret == -EFSBADCRC) + goto next_grp; + err = ret; + goto out_journal; + } + + i += ret; + if (handle && i > 1) { + /* + * We're only journalling primary superblock and + * two backup superblocks; the rest is not + * journalled. + */ + err = ext4_journal_stop(handle); + if (err) + goto out; + handle = NULL; + } +next_grp: + grp = ext4_list_backups(sb, &three, &five, &seven); + } + +out_journal: + if (handle) { + ret = ext4_journal_stop(handle); + if (ret && !err) + err = ret; + } +out: + clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags); + smp_mb__after_atomic(); + return err ? err : 0; +} + +/* * Swap memory between @a and @b for @len bytes. * * @a: pointer to first memory area @@ -48,7 +290,7 @@ static void memswap(void *a, void *b, size_t len) } } -/** +/* * Swap i_data and associated attributes between @inode1 and @inode2. * This function is used for the primary swap between inode1 and inode2 * and also to revert this primary swap in case of errors. @@ -102,7 +344,7 @@ void ext4_reset_inode_seed(struct inode *inode) ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen)); } -/** +/* * Swap the information from the given @inode and the inode * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other * important fields of the inodes. @@ -169,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb, err = -EINVAL; goto err_out; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); @@ -252,7 +494,6 @@ revert: err_out1: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); ext4_double_up_write_data_sem(inode, inode_bl); err_out: @@ -743,7 +984,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns, u32 flags = fa->flags; int err = -EOPNOTSUPP; - ext4_fc_start_update(inode); if (flags & ~EXT4_FL_USER_VISIBLE) goto out; @@ -764,7 +1004,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns, goto out; err = ext4_ioctl_setproject(inode, fa->fsx_projid); out: - ext4_fc_stop_update(inode); return err; } @@ -850,6 +1089,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) return err; } +static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label) +{ + size_t len; + int ret = 0; + char new_label[EXT4_LABEL_MAX + 1]; + struct super_block *sb = file_inode(filp)->i_sb; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * Copy the maximum length allowed for ext4 label with one more to + * find the required terminating null byte in order to test the + * label length. The on disk label doesn't need to be null terminated. + */ + if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1)) + return -EFAULT; + + len = strnlen(new_label, EXT4_LABEL_MAX + 1); + if (len > EXT4_LABEL_MAX) + return -EINVAL; + + /* + * Clear the buffer after the new label + */ + memset(new_label + len, 0, EXT4_LABEL_MAX - len); + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label); + + mnt_drop_write_file(filp); + return ret; +} + +static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label) +{ + char label[EXT4_LABEL_MAX + 1]; + + /* + * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because + * FSLABEL_MAX must include terminating null byte, while s_volume_name + * does not have to. + */ + BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX); + + memset(label, 0, sizeof(label)); + lock_buffer(sbi->s_sbh); + strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX); + unlock_buffer(sbi->s_sbh); + + if (copy_to_user(user_label, label, sizeof(label))) + return -EFAULT; + return 0; +} + static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1076,7 +1373,7 @@ mext_out: err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); @@ -1117,8 +1414,6 @@ resizefs_out: sizeof(range))) return -EFAULT; - range.minlen = max((unsigned int)range.minlen, - q->limits.discard_granularity); ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; @@ -1266,6 +1561,13 @@ resizefs_out: case EXT4_IOC_CHECKPOINT: return ext4_ioctl_checkpoint(filp, arg); + case FS_IOC_GETFSLABEL: + return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg); + + case FS_IOC_SETFSLABEL: + return ext4_ioctl_setlabel(filp, + (const void __user *)arg); + default: return -ENOTTY; } @@ -1273,13 +1575,7 @@ resizefs_out: long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long ret; - - ext4_fc_start_update(file_inode(filp)); - ret = __ext4_ioctl(filp, cmd, arg); - ext4_fc_stop_update(file_inode(filp)); - - return ret; + return __ext4_ioctl(filp, cmd, arg); } #ifdef CONFIG_COMPAT @@ -1347,6 +1643,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: case EXT4_IOC_CHECKPOINT: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 215b7068f548..252c168454c7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac) return 0; if (ac->ac_criteria >= 2) return 0; - if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) + if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) return 0; return 1; } @@ -1689,7 +1689,7 @@ static int mb_test_and_clear_bits(void *bm, int cur, int len) return zero_bit; } -void ext4_set_bits(void *bm, int cur, int len) +void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1996,7 +1996,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info); - ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); + mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -2834,7 +2834,7 @@ out: static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) @@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; ++*pos; @@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; @@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos) __acquires(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; read_lock(&EXT4_SB(sb)->s_mb_rb_lock); @@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock) static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; ++*pos; @@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long position = ((unsigned long) v); struct ext4_group_info *grp; @@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v) __releases(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); read_unlock(&EXT4_SB(sb)->s_mb_rb_lock); } @@ -3825,7 +3825,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3844,7 +3844,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); if (ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { @@ -3899,69 +3899,103 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed, thisgrp_len; - clen = EXT4_B2C(sbi, len); + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len); + + if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { + ext4_error(sb, "Marking blocks in system zone - " + "Block = %llu, len = %u", + block, thisgrp_len); + bitmap_bh = NULL; + break; + } - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + } - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; - else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++; + + clen_changed = clen - already; + if (state) + mb_set_bits(bitmap_bh->b_data, blkoff, clen); + else + mb_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); - ext4_unlock_group(sb, group); + ext4_unlock_group(sb, group); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); - atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); } - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); } /* @@ -4433,7 +4467,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, efd_node); - ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); + mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); n = rb_next(n); } return; @@ -4474,7 +4508,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - ext4_set_bits(bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; } mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); @@ -4814,7 +4848,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, */ static noinline_for_stack int ext4_mb_discard_group_preallocations(struct super_block *sb, - ext4_group_t group, int needed) + ext4_group_t group, int *busy) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; @@ -4822,8 +4856,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct list_head list; struct ext4_buddy e4b; int err; - int busy = 0; - int free, free_total = 0; + int free = 0; mb_debug(sb, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) @@ -4846,19 +4879,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, goto out_dbg; } - if (needed == 0) - needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; - INIT_LIST_HEAD(&list); -repeat: - free = 0; ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, &grp->bb_prealloc_list, pa_group_list) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { spin_unlock(&pa->pa_lock); - busy = 1; + *busy = 1; continue; } if (pa->pa_deleted) { @@ -4898,22 +4926,13 @@ repeat: call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - free_total += free; - - /* if we still need more blocks and some PAs were used, try again */ - if (free_total < needed && busy) { - ext4_unlock_group(sb, group); - cond_resched(); - busy = 0; - goto repeat; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); out_dbg: mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", - free_total, group, grp->bb_free); - return free_total; + free, group, grp->bb_free); + return free; } /* @@ -5455,13 +5474,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; - int freed = 0; + int freed = 0, busy = 0; + int retry = 0; trace_ext4_mb_discard_preallocations(sb, needed); + + if (needed == 0) + needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; + repeat: for (i = 0; i < ngroups && needed > 0; i++) { - ret = ext4_mb_discard_group_preallocations(sb, i, needed); + ret = ext4_mb_discard_group_preallocations(sb, i, &busy); freed += ret; needed -= ret; + cond_resched(); + } + + if (needed > 0 && busy && ++retry < 3) { + busy = 0; + goto repeat; } return freed; @@ -5757,7 +5787,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, struct super_block *sb = ar->inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; - int i = sb->s_blocksize; + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; ext4_fsblk_t goal, block; struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5779,19 +5810,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, ext4_get_group_no_and_offset(sb, max(ext4_group_first_block_no(sb, group), goal), NULL, &blkoff); - i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize, + while (1) { + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, blkoff); + if (i >= max) + break; + if (ext4_fc_replay_check_excluded(sb, + ext4_group_first_block_no(sb, group) + i)) { + blkoff = i + 1; + } else + break; + } brelse(bitmap_bh); - if (i >= sb->s_blocksize) - continue; - if (ext4_fc_replay_check_excluded(sb, - ext4_group_first_block_no(sb, group) + i)) - continue; - break; + if (i < max) + break; } - if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize) + if (group >= ext4_get_groups_count(sb) || i >= max) { + *errp = -ENOSPC; return 0; + } block = ext4_group_first_block_no(sb, group) + i; ext4_mb_mark_bb(sb, block, 1, 1); @@ -5842,17 +5880,17 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, } /** - * ext4_free_blocks() -- Free given blocks and update quota + * ext4_mb_clear_bb() -- helper function for freeing blocks. + * Used by ext4_free_blocks() * @handle: handle for this transaction * @inode: inode - * @bh: optional buffer of the block to be freed * @block: starting physical block to be freed * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block, - unsigned long count, int flags) +static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -5869,80 +5907,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); - if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); - if (bh) { - if (block) - BUG_ON(block != bh->b_blocknr); - else - block = bh->b_blocknr; - } - - if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_inode_block_valid(inode, block, count)) { - ext4_error(sb, "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); - goto error_return; - } - - ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, flags); - - if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - BUG_ON(count > 1); - - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, bh, block); - } - - /* - * If the extent to be freed does not begin on a cluster - * boundary, we need to deal with partial clusters at the - * beginning and end of the extent. Normally we will free - * blocks at the beginning or the end unless we are explicitly - * requested to avoid doing so. - */ - overflow = EXT4_PBLK_COFF(sbi, block); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { - overflow = sbi->s_cluster_ratio - overflow; - block += overflow; - if (count > overflow) - count -= overflow; - else - return; - } else { - block -= overflow; - count += overflow; - } - } - overflow = EXT4_LBLK_COFF(sbi, count); - if (overflow) { - if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { - if (count > overflow) - count -= overflow; - else - return; - } else - count += sbi->s_cluster_ratio - overflow; - } - - if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { - int i; - int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; - - for (i = 0; i < count; i++) { - cond_resched(); - if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); - ext4_forget(handle, is_metadata, inode, bh, block + i); - } - } - do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -5973,13 +5937,7 @@ do_more: goto error_return; } - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || - in_range(ext4_inode_bitmap(sb, gdp), block, count) || - in_range(block, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group)) { - + if (!ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ @@ -6050,7 +6008,7 @@ do_more: NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%lu failed" + " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); } else @@ -6111,6 +6069,103 @@ error_return: } /** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed + * @flags: flags used by ext4_free_blocks + */ +void ext4_free_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) +{ + struct super_block *sb = inode->i_sb; + unsigned int overflow; + struct ext4_sb_info *sbi; + + sbi = EXT4_SB(sb); + + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } + + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_inode_block_valid(inode, block, count)) { + ext4_error(sb, "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); + return; + } + + ext4_debug("freeing block %llu\n", block); + trace_ext4_free_blocks(inode, block, count, flags); + + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); + + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); + } + + /* + * If the extent to be freed does not begin on a cluster + * boundary, we need to deal with partial clusters at the + * beginning and end of the extent. Normally we will free + * blocks at the beginning or the end unless we are explicitly + * requested to avoid doing so. + */ + overflow = EXT4_PBLK_COFF(sbi, block); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { + overflow = sbi->s_cluster_ratio - overflow; + block += overflow; + if (count > overflow) + count -= overflow; + else + return; + } else { + block -= overflow; + count += overflow; + } + } + overflow = EXT4_LBLK_COFF(sbi, count); + if (overflow) { + if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { + if (count > overflow) + count -= overflow; + else + return; + } else + count += sbi->s_cluster_ratio - overflow; + } + + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; + + for (i = 0; i < count; i++) { + cond_resched(); + if (is_metadata) + bh = sb_find_get_block(inode->i_sb, block + i); + ext4_forget(handle, is_metadata, inode, bh, block + i); + } + } + + ext4_mb_clear_bb(handle, inode, block, count, flags); + return; +} + +/** * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block @@ -6166,11 +6221,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, goto error_return; } - if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { + if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); @@ -6373,7 +6424,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || - minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) { + minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); if (ret >= 0) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); @@ -6404,6 +6455,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -6422,6 +6474,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; + /* No point to try to trim less than discard granularity */ + if (range->minlen < q->limits.discard_granularity) { + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + q->limits.discard_granularity >> sb->s_blocksize_bits); + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } if (end >= max_blks) end = max_blks - 1; if (end <= first_data_blk) @@ -6474,7 +6533,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } if (!ret) - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + EXT4_SB(sb)->s_last_trim_minblks = minlen; out: range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 7e0b4f81c6c0..7a5353a8cfd7 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* - * Worst case we can touch the allocation bitmaps, a bgd - * block, and a block to link in the orphan list. We do need - * need to worry about credits for modifying the quota inode. + * Worst case we can touch the allocation bitmaps and a block + * group descriptor block. We do need need to worry about + * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, - 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); + 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); @@ -459,6 +459,13 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } + /* + * Use the correct seed for checksum (i.e. the seed from 'inode'). This + * is so that the metadata blocks will have the correct checksum after + * the migration. + */ + ei = EXT4_I(inode); + EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later @@ -467,7 +474,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); /* @@ -479,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode) * when we add extents we extent the journal */ /* - * Even though we take i_mutex we can still cause block + * Even though we take i_rwsem we can still cause block * allocation via mmap write to holes. If we have allocated * new blocks we fail migrate. New block allocation will * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated @@ -492,17 +498,10 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { - /* - * It is impossible to update on-disk structures without - * a handle, so just rollback in-core changes and live other - * work to orphan_list_cleanup() - */ - ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); goto out_tmp_inode; } - ei = EXT4_I(inode); i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 64a579734f93..95aa212f0863 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -632,7 +632,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, /* Check hole before the start pos */ if (cur_blk + cur_len - 1 < o_start) { if (next_blk == EXT_MAX_BLOCKS) { - o_start = o_end; ret = -ENODATA; goto out; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 52c9bd154122..e37da8d5cd0c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1317,7 +1317,7 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) dx_set_count(entries, count + 1); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. If quick is set, assume the name being looked up @@ -1428,7 +1428,7 @@ static bool ext4_match(struct inode *parent, f.crypto_buf = fname->crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { if (fname->cf_name.name) { @@ -1800,7 +1800,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -2308,7 +2308,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; @@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode) if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); - return true; + return false; } /* The first directory block must not be a hole, * so treat it as DIRENT_HTREE */ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) - return true; + return false; de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, @@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); - return true; + return false; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); @@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); - return true; + return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { @@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode) continue; } if (IS_ERR(bh)) - return true; + return false; } de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); @@ -3126,7 +3126,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3231,7 +3231,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); if (!retval) ext4_fc_track_unlink(handle, dentry); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3889,14 +3889,21 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * dirents in directories. */ ext4_fc_mark_ineligible(old.inode->i_sb, - EXT4_FC_REASON_RENAME_DIR); + EXT4_FC_REASON_RENAME_DIR, handle); } else { + struct super_block *sb = old.inode->i_sb; + if (new.inode) ext4_fc_track_unlink(handle, new.dentry); - __ext4_fc_track_link(handle, old.inode, new.dentry); - __ext4_fc_track_unlink(handle, old.inode, old.dentry); - if (whiteout) - __ext4_fc_track_create(handle, whiteout, old.dentry); + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && + !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) { + __ext4_fc_track_link(handle, old.inode, new.dentry); + __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, + old.dentry); + } } if (new.inode) { @@ -4049,7 +4056,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(retval)) goto end_rename; ext4_fc_mark_ineligible(new.inode->i_sb, - EXT4_FC_REASON_CROSS_RENAME); + EXT4_FC_REASON_CROSS_RENAME, handle); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 53adc8f570a3..7de0612eb42d 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). * - * Orphan list manipulation functions must be called under i_mutex unless + * Orphan list manipulation functions must be called under i_rwsem unless * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) @@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* * Orphan handling is only valid for files with data blocks * being truncated, or files being unlinked. Note that we either - * hold i_mutex, or the inode can not be referenced from outside, + * hold i_rwsem, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 9cb261714991..495ce59fb4ad 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -24,7 +24,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> -#include <linux/backing-dev.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -323,10 +323,9 @@ static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; sector_t bi_sector = bio->bi_iter.bi_sector; - char b[BDEVNAME_SIZE]; - if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n", - bio_devname(bio, b), + if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n", + bio->bi_bdev, (long long) bio->bi_iter.bi_sector, (unsigned) bio_sectors(bio), bio->bi_status)) { @@ -372,10 +371,8 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { - int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - REQ_SYNC : 0; - io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint; - bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); + if (io->io_wbc->sync_mode == WB_SYNC_ALL) + io->io_bio->bi_opf |= REQ_SYNC; submit_bio(io->io_bio); } io->io_bio = NULL; @@ -398,10 +395,9 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; @@ -421,10 +417,8 @@ static void io_submit_add_bh(struct ext4_io_submit *io, submit_and_retry: ext4_io_submit(io); } - if (io->io_bio == NULL) { + if (io->io_bio == NULL) io_submit_init_bio(io, bh); - io->io_bio->bi_write_hint = inode->i_write_hint; - } ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; @@ -523,12 +517,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { - gfp_flags = GFP_NOFS; + gfp_t new_gfp_flags = GFP_NOFS; if (io->io_bio) ext4_io_submit(io); else - gfp_flags |= __GFP_NOFAIL; - congestion_wait(BLK_RW_ASYNC, HZ/50); + new_gfp_flags |= __GFP_NOFAIL; + memalloc_retry_wait(gfp_flags); + gfp_flags = new_gfp_flags; goto retry_encrypt; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3db923403505..1aa26d6634fc 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -43,7 +43,6 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> -#include <linux/cleancache.h> #include "ext4.h" @@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode, } else if (fully_mapped) { SetPageMappedToDisk(page); } - if (fully_mapped && blocks_per_page == 1 && - !PageUptodate(page) && cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; - } /* * This page will go to BIO. Do we need to send this @@ -371,15 +365,15 @@ int ext4_mpage_readpages(struct inode *inode, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). */ - bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages)); + bio = bio_alloc(bdev, bio_max_segs(nr_pages), + REQ_OP_READ, GFP_KERNEL); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); ext4_set_bio_post_read_ctx(bio, inode, page->index); - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, - rac ? REQ_RAHEAD : 0); + if (rac) + bio->bi_opf |= REQ_RAHEAD; } length = first_hole << blkbits; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b63cb88ccdae..90a941d20dff 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -14,6 +14,7 @@ #include <linux/errno.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include "ext4_jbd2.h" @@ -483,7 +484,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", first_cluster, first_cluster - start, count2); - ext4_set_bits(bh->b_data, first_cluster - start, count2); + mb_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); @@ -632,7 +633,7 @@ handle_bb: if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, + mb_set_bits(bh->b_data, 0, EXT4_NUM_B2C(sbi, overhead)); } ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), @@ -717,12 +718,23 @@ out: * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... */ -static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, - unsigned *five, unsigned *seven) +unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three, + unsigned int *five, unsigned int *seven) { - unsigned *min = three; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + unsigned int *min = three; int mult = 3; - unsigned ret; + unsigned int ret; + + if (ext4_has_feature_sparse_super2(sb)) { + do { + if (*min > 2) + return UINT_MAX; + ret = le32_to_cpu(es->s_backup_bgs[*min - 1]); + *min += 1; + } while (!ret); + return ret; + } if (!ext4_has_feature_sparse_super(sb)) { ret = *min; @@ -2089,7 +2101,7 @@ retry: */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { - if (jiffies - last_update_time > HZ * 10) { + if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, "resized to %llu blocks", diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e33b5eca694..81749eaddf4c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,7 +39,6 @@ #include <linux/log2.h> #include <linux/crc16.h> #include <linux/dax.h> -#include <linux/cleancache.h> #include <linux/uaccess.h> #include <linux/iversion.h> #include <linux/unicode.h> @@ -47,6 +46,8 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/fsnotify.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "ext4.h" #include "ext4_extents.h" /* Needed for trace points definition */ @@ -73,12 +74,9 @@ static int ext4_mark_recovery_complete(struct super_block *sb, static int ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static int ext4_freeze(struct super_block *sb); -static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data); static inline int ext2_feature_set_ok(struct super_block *sb); static inline int ext3_feature_set_ok(struct super_block *sb); static void ext4_destroy_lazyinit_thread(void); @@ -86,6 +84,16 @@ static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); static struct inode *ext4_get_journal_inode(struct super_block *sb, unsigned int journal_inum); +static int ext4_validate_options(struct fs_context *fc); +static int ext4_check_opt_consistency(struct fs_context *fc, + struct super_block *sb); +static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); +static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); +static int ext4_get_tree(struct fs_context *fc); +static int ext4_reconfigure(struct fs_context *fc); +static void ext4_fc_free(struct fs_context *fc); +static int ext4_init_fs_context(struct fs_context *fc); +static const struct fs_parameter_spec ext4_param_specs[]; /* * Lock ordering @@ -113,13 +121,22 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, * transaction start -> page lock(s) -> i_data_sem (rw) */ +static const struct fs_context_operations ext4_context_ops = { + .parse_param = ext4_parse_param, + .get_tree = ext4_get_tree, + .reconfigure = ext4_reconfigure, + .free = ext4_fc_free, +}; + + #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { - .owner = THIS_MODULE, - .name = "ext2", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "ext2", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext2"); MODULE_ALIAS("ext2"); @@ -130,11 +147,12 @@ MODULE_ALIAS("ext2"); static struct file_system_type ext3_fs_type = { - .owner = THIS_MODULE, - .name = "ext3", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "ext3", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); @@ -260,8 +278,8 @@ static int ext4_verify_csum_type(struct super_block *sb, return es->s_checksum_type == EXT4_CRC32C_CHKSUM; } -static __le32 ext4_superblock_csum(struct super_block *sb, - struct ext4_super_block *es) +__le32 ext4_superblock_csum(struct super_block *sb, + struct ext4_super_block *es) { struct ext4_sb_info *sbi = EXT4_SB(sb); int offset = offsetof(struct ext4_super_block, s_checksum); @@ -912,14 +930,20 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; - atomic_inc(&EXT4_SB(sb)->s_msg_count); - if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) - return; + if (sb) { + atomic_inc(&EXT4_SB(sb)->s_msg_count); + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), + "EXT4-fs")) + return; + } va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + if (sb) + printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + else + printk("%sEXT4-fs: %pV\n", prefix, &vaf); va_end(args); } @@ -1277,7 +1301,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -1292,7 +1316,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) { struct ext4_inode_info *ei; - ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); + ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; @@ -1647,7 +1671,6 @@ static const struct super_operations ext4_sops = { .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, .statfs = ext4_statfs, - .remount_fs = ext4_remount, .show_options = ext4_show_options, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, @@ -1665,7 +1688,7 @@ static const struct export_operations ext4_export_ops = { enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_resgid, Opt_resuid, Opt_sb, Opt_nouid32, Opt_debug, Opt_removed, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, @@ -1674,152 +1697,169 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, Opt_inlinecrypt, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, + Opt_usrjquota, Opt_grpjquota, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, - Opt_nowarn_on_error, Opt_mblk_io_submit, - Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, + Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type, #ifdef CONFIG_EXT4_DEBUG Opt_fc_debug_max_replay, Opt_fc_debug_force #endif }; -static const match_table_t tokens = { - {Opt_bsd_df, "bsddf"}, - {Opt_minix_df, "minixdf"}, - {Opt_grpid, "grpid"}, - {Opt_grpid, "bsdgroups"}, - {Opt_nogrpid, "nogrpid"}, - {Opt_nogrpid, "sysvgroups"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_sb, "sb=%u"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_nouid32, "nouid32"}, - {Opt_debug, "debug"}, - {Opt_removed, "oldalloc"}, - {Opt_removed, "orlov"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_noload, "norecovery"}, - {Opt_noload, "noload"}, - {Opt_removed, "nobh"}, - {Opt_removed, "bh"}, - {Opt_commit, "commit=%u"}, - {Opt_min_batch_time, "min_batch_time=%u"}, - {Opt_max_batch_time, "max_batch_time=%u"}, - {Opt_journal_dev, "journal_dev=%u"}, - {Opt_journal_path, "journal_path=%s"}, - {Opt_journal_checksum, "journal_checksum"}, - {Opt_nojournal_checksum, "nojournal_checksum"}, - {Opt_journal_async_commit, "journal_async_commit"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_data_err_abort, "data_err=abort"}, - {Opt_data_err_ignore, "data_err=ignore"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_grpquota, "grpquota"}, - {Opt_noquota, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_barrier, "barrier=%u"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_i_version, "i_version"}, - {Opt_dax, "dax"}, - {Opt_dax_always, "dax=always"}, - {Opt_dax_inode, "dax=inode"}, - {Opt_dax_never, "dax=never"}, - {Opt_stripe, "stripe=%u"}, - {Opt_delalloc, "delalloc"}, - {Opt_warn_on_error, "warn_on_error"}, - {Opt_nowarn_on_error, "nowarn_on_error"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, - {Opt_nodelalloc, "nodelalloc"}, - {Opt_removed, "mblk_io_submit"}, - {Opt_removed, "nomblk_io_submit"}, - {Opt_block_validity, "block_validity"}, - {Opt_noblock_validity, "noblock_validity"}, - {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, - {Opt_journal_ioprio, "journal_ioprio=%u"}, - {Opt_auto_da_alloc, "auto_da_alloc=%u"}, - {Opt_auto_da_alloc, "auto_da_alloc"}, - {Opt_noauto_da_alloc, "noauto_da_alloc"}, - {Opt_dioread_nolock, "dioread_nolock"}, - {Opt_dioread_lock, "nodioread_nolock"}, - {Opt_dioread_lock, "dioread_lock"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_init_itable, "init_itable=%u"}, - {Opt_init_itable, "init_itable"}, - {Opt_noinit_itable, "noinit_itable"}, -#ifdef CONFIG_EXT4_DEBUG - {Opt_fc_debug_force, "fc_debug_force"}, - {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"}, -#endif - {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_nombcache, "nombcache"}, - {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ - {Opt_removed, "prefetch_block_bitmaps"}, - {Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"}, - {Opt_mb_optimize_scan, "mb_optimize_scan=%d"}, - {Opt_removed, "check=none"}, /* mount option from ext2/3 */ - {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ - {Opt_removed, "reservation"}, /* mount option from ext2/3 */ - {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ - {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ - {Opt_err, NULL}, +static const struct constant_table ext4_param_errors[] = { + {"continue", EXT4_MOUNT_ERRORS_CONT}, + {"panic", EXT4_MOUNT_ERRORS_PANIC}, + {"remount-ro", EXT4_MOUNT_ERRORS_RO}, + {} }; -static ext4_fsblk_t get_sb_block(void **data) -{ - ext4_fsblk_t sb_block; - char *options = (char *) *data; +static const struct constant_table ext4_param_data[] = { + {"journal", EXT4_MOUNT_JOURNAL_DATA}, + {"ordered", EXT4_MOUNT_ORDERED_DATA}, + {"writeback", EXT4_MOUNT_WRITEBACK_DATA}, + {} +}; - if (!options || strncmp(options, "sb=", 3) != 0) - return 1; /* Default location */ +static const struct constant_table ext4_param_data_err[] = { + {"abort", Opt_data_err_abort}, + {"ignore", Opt_data_err_ignore}, + {} +}; - options += 3; - /* TODO: use simple_strtoll with >32bit ext4 */ - sb_block = simple_strtoul(options, &options, 0); - if (*options && *options != ',') { - printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", - (char *) *data); - return 1; - } - if (*options == ',') - options++; - *data = (void *) options; +static const struct constant_table ext4_param_jqfmt[] = { + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, + {} +}; - return sb_block; -} +static const struct constant_table ext4_param_dax[] = { + {"always", Opt_dax_always}, + {"inode", Opt_dax_inode}, + {"never", Opt_dax_never}, + {} +}; + +/* String parameter that allows empty argument */ +#define fsparam_string_empty(NAME, OPT) \ + __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) + +/* + * Mount option specification + * We don't use fsparam_flag_no because of the way we set the + * options and the way we show them in _ext4_show_options(). To + * keep the changes to a minimum, let's keep the negative options + * separate for now. + */ +static const struct fs_parameter_spec ext4_param_specs[] = { + fsparam_flag ("bsddf", Opt_bsd_df), + fsparam_flag ("minixdf", Opt_minix_df), + fsparam_flag ("grpid", Opt_grpid), + fsparam_flag ("bsdgroups", Opt_grpid), + fsparam_flag ("nogrpid", Opt_nogrpid), + fsparam_flag ("sysvgroups", Opt_nogrpid), + fsparam_u32 ("resgid", Opt_resgid), + fsparam_u32 ("resuid", Opt_resuid), + fsparam_u32 ("sb", Opt_sb), + fsparam_enum ("errors", Opt_errors, ext4_param_errors), + fsparam_flag ("nouid32", Opt_nouid32), + fsparam_flag ("debug", Opt_debug), + fsparam_flag ("oldalloc", Opt_removed), + fsparam_flag ("orlov", Opt_removed), + fsparam_flag ("user_xattr", Opt_user_xattr), + fsparam_flag ("nouser_xattr", Opt_nouser_xattr), + fsparam_flag ("acl", Opt_acl), + fsparam_flag ("noacl", Opt_noacl), + fsparam_flag ("norecovery", Opt_noload), + fsparam_flag ("noload", Opt_noload), + fsparam_flag ("bh", Opt_removed), + fsparam_flag ("nobh", Opt_removed), + fsparam_u32 ("commit", Opt_commit), + fsparam_u32 ("min_batch_time", Opt_min_batch_time), + fsparam_u32 ("max_batch_time", Opt_max_batch_time), + fsparam_u32 ("journal_dev", Opt_journal_dev), + fsparam_bdev ("journal_path", Opt_journal_path), + fsparam_flag ("journal_checksum", Opt_journal_checksum), + fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum), + fsparam_flag ("journal_async_commit",Opt_journal_async_commit), + fsparam_flag ("abort", Opt_abort), + fsparam_enum ("data", Opt_data, ext4_param_data), + fsparam_enum ("data_err", Opt_data_err, + ext4_param_data_err), + fsparam_string_empty + ("usrjquota", Opt_usrjquota), + fsparam_string_empty + ("grpjquota", Opt_grpjquota), + fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt), + fsparam_flag ("grpquota", Opt_grpquota), + fsparam_flag ("quota", Opt_quota), + fsparam_flag ("noquota", Opt_noquota), + fsparam_flag ("usrquota", Opt_usrquota), + fsparam_flag ("prjquota", Opt_prjquota), + fsparam_flag ("barrier", Opt_barrier), + fsparam_u32 ("barrier", Opt_barrier), + fsparam_flag ("nobarrier", Opt_nobarrier), + fsparam_flag ("i_version", Opt_i_version), + fsparam_flag ("dax", Opt_dax), + fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), + fsparam_u32 ("stripe", Opt_stripe), + fsparam_flag ("delalloc", Opt_delalloc), + fsparam_flag ("nodelalloc", Opt_nodelalloc), + fsparam_flag ("warn_on_error", Opt_warn_on_error), + fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error), + fsparam_u32 ("debug_want_extra_isize", + Opt_debug_want_extra_isize), + fsparam_flag ("mblk_io_submit", Opt_removed), + fsparam_flag ("nomblk_io_submit", Opt_removed), + fsparam_flag ("block_validity", Opt_block_validity), + fsparam_flag ("noblock_validity", Opt_noblock_validity), + fsparam_u32 ("inode_readahead_blks", + Opt_inode_readahead_blks), + fsparam_u32 ("journal_ioprio", Opt_journal_ioprio), + fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc), + fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc), + fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc), + fsparam_flag ("dioread_nolock", Opt_dioread_nolock), + fsparam_flag ("nodioread_nolock", Opt_dioread_lock), + fsparam_flag ("dioread_lock", Opt_dioread_lock), + fsparam_flag ("discard", Opt_discard), + fsparam_flag ("nodiscard", Opt_nodiscard), + fsparam_u32 ("init_itable", Opt_init_itable), + fsparam_flag ("init_itable", Opt_init_itable), + fsparam_flag ("noinit_itable", Opt_noinit_itable), +#ifdef CONFIG_EXT4_DEBUG + fsparam_flag ("fc_debug_force", Opt_fc_debug_force), + fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay), +#endif + fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb), + fsparam_flag ("test_dummy_encryption", + Opt_test_dummy_encryption), + fsparam_string ("test_dummy_encryption", + Opt_test_dummy_encryption), + fsparam_flag ("inlinecrypt", Opt_inlinecrypt), + fsparam_flag ("nombcache", Opt_nombcache), + fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */ + fsparam_flag ("prefetch_block_bitmaps", + Opt_removed), + fsparam_flag ("no_prefetch_block_bitmaps", + Opt_no_prefetch_block_bitmaps), + fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan), + fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */ + fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */ + {} +}; #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) #define DEFAULT_MB_OPTIMIZE_SCAN (-1) @@ -1828,90 +1868,22 @@ static const char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; -#ifdef CONFIG_QUOTA -static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - char *qname, *old_qname = get_qf_name(sb, sbi, qtype); - int ret = -1; - - if (sb_any_quota_loaded(sb) && !old_qname) { - ext4_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); - return -1; - } - if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_INFO, "Journaled quota options " - "ignored when QUOTA feature is enabled"); - return 1; - } - qname = match_strdup(args); - if (!qname) { - ext4_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); - return -1; - } - if (old_qname) { - if (strcmp(old_qname, qname) == 0) - ret = 1; - else - ext4_msg(sb, KERN_ERR, - "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - ext4_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); - goto errout; - } - rcu_assign_pointer(sbi->s_qf_names[qtype], qname); - set_opt(sb, QUOTA); - return 1; -errout: - kfree(qname); - return ret; -} - -static int clear_qf_name(struct super_block *sb, int qtype) -{ - - struct ext4_sb_info *sbi = EXT4_SB(sb); - char *old_qname = get_qf_name(sb, sbi, qtype); - - if (sb_any_quota_loaded(sb) && old_qname) { - ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); - return -1; - } - rcu_assign_pointer(sbi->s_qf_names[qtype], NULL); - synchronize_rcu(); - kfree(old_qname); - return 1; -} -#endif - #define MOPT_SET 0x0001 #define MOPT_CLEAR 0x0002 #define MOPT_NOSUPPORT 0x0004 #define MOPT_EXPLICIT 0x0008 -#define MOPT_CLEAR_ERR 0x0010 -#define MOPT_GTE0 0x0020 #ifdef CONFIG_QUOTA #define MOPT_Q 0 -#define MOPT_QFMT 0x0040 +#define MOPT_QFMT 0x0010 #else #define MOPT_Q MOPT_NOSUPPORT #define MOPT_QFMT MOPT_NOSUPPORT #endif -#define MOPT_DATAJ 0x0080 -#define MOPT_NO_EXT2 0x0100 -#define MOPT_NO_EXT3 0x0200 +#define MOPT_NO_EXT2 0x0020 +#define MOPT_NO_EXT3 0x0040 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) -#define MOPT_STRING 0x0400 -#define MOPT_SKIP 0x0800 -#define MOPT_2 0x1000 +#define MOPT_SKIP 0x0080 +#define MOPT_2 0x0100 static const struct mount_opts { int token; @@ -1944,40 +1916,17 @@ static const struct mount_opts { EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, - {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, - MOPT_NO_EXT2}, - {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, - MOPT_NO_EXT2}, + {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2}, {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, - {Opt_commit, 0, MOPT_GTE0}, - {Opt_max_batch_time, 0, MOPT_GTE0}, - {Opt_min_batch_time, 0, MOPT_GTE0}, - {Opt_inode_readahead_blks, 0, MOPT_GTE0}, - {Opt_init_itable, 0, MOPT_GTE0}, - {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP}, - {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_stripe, 0, MOPT_GTE0}, - {Opt_resuid, 0, MOPT_GTE0}, - {Opt_resgid, 0, MOPT_GTE0}, - {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0}, - {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING}, - {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0}, - {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, - {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, - {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, - MOPT_NO_EXT2 | MOPT_DATAJ}, + {Opt_dax_type, 0, MOPT_EXT4_ONLY}, + {Opt_journal_dev, 0, MOPT_NO_EXT2}, + {Opt_journal_path, 0, MOPT_NO_EXT2}, + {Opt_journal_ioprio, 0, MOPT_NO_EXT2}, + {Opt_data, 0, MOPT_NO_EXT2}, {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, #ifdef CONFIG_EXT4_FS_POSIX_ACL @@ -1989,7 +1938,6 @@ static const struct mount_opts { #endif {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, - {Opt_debug_want_extra_isize, 0, MOPT_GTE0}, {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, @@ -2000,499 +1948,1005 @@ static const struct mount_opts { {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), MOPT_CLEAR | MOPT_Q}, - {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING}, - {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING}, - {Opt_offusrjquota, 0, MOPT_Q}, - {Opt_offgrpjquota, 0, MOPT_Q}, - {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, - {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, - {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, - {Opt_max_dir_size_kb, 0, MOPT_GTE0}, - {Opt_test_dummy_encryption, 0, MOPT_STRING}, + {Opt_usrjquota, 0, MOPT_Q}, + {Opt_grpjquota, 0, MOPT_Q}, + {Opt_jqfmt, 0, MOPT_QFMT}, {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS, MOPT_SET}, - {Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0}, #ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, - {Opt_fc_debug_max_replay, 0, MOPT_GTE0}, #endif {Opt_err, 0, 0} }; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct ext4_sb_encodings { __u16 magic; char *name; - char *version; + unsigned int version; } ext4_sb_encoding_map[] = { - {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"}, + {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)}, }; -static int ext4_sb_read_encoding(const struct ext4_super_block *es, - const struct ext4_sb_encodings **encoding, - __u16 *flags) +static const struct ext4_sb_encodings * +ext4_sb_read_encoding(const struct ext4_super_block *es) { __u16 magic = le16_to_cpu(es->s_encoding); int i; for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++) if (magic == ext4_sb_encoding_map[i].magic) - break; + return &ext4_sb_encoding_map[i]; - if (i >= ARRAY_SIZE(ext4_sb_encoding_map)) - return -EINVAL; - - *encoding = &ext4_sb_encoding_map[i]; - *flags = le16_to_cpu(es->s_encoding_flags); - - return 0; + return NULL; } #endif -static int ext4_set_test_dummy_encryption(struct super_block *sb, - const char *opt, - const substring_t *arg, - bool is_remount) +static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) { #ifdef CONFIG_FS_ENCRYPTION struct ext4_sb_info *sbi = EXT4_SB(sb); int err; - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !sbi->s_dummy_enc_policy.policy) { - ext4_msg(sb, KERN_WARNING, - "Can't set test_dummy_encryption on remount"); - return -1; - } - err = fscrypt_set_test_dummy_encryption(sb, arg->from, + err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_policy); if (err) { - if (err == -EEXIST) - ext4_msg(sb, KERN_WARNING, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - ext4_msg(sb, KERN_WARNING, - "Value of option \"%s\" is unrecognized", opt); - else - ext4_msg(sb, KERN_WARNING, - "Error processing option \"%s\" [%d]", - opt, err); - return -1; + ext4_msg(sb, KERN_WARNING, + "Error while setting test dummy encryption [%d]", err); + return err; } ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); -#else - ext4_msg(sb, KERN_WARNING, - "Test dummy encryption mount option ignored"); #endif - return 1; + return 0; } -struct ext4_parsed_options { - unsigned long journal_devnum; - unsigned int journal_ioprio; - int mb_optimize_scan; +#define EXT4_SPEC_JQUOTA (1 << 0) +#define EXT4_SPEC_JQFMT (1 << 1) +#define EXT4_SPEC_DATAJ (1 << 2) +#define EXT4_SPEC_SB_BLOCK (1 << 3) +#define EXT4_SPEC_JOURNAL_DEV (1 << 4) +#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5) +#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6) +#define EXT4_SPEC_s_want_extra_isize (1 << 7) +#define EXT4_SPEC_s_max_batch_time (1 << 8) +#define EXT4_SPEC_s_min_batch_time (1 << 9) +#define EXT4_SPEC_s_inode_readahead_blks (1 << 10) +#define EXT4_SPEC_s_li_wait_mult (1 << 11) +#define EXT4_SPEC_s_max_dir_size_kb (1 << 12) +#define EXT4_SPEC_s_stripe (1 << 13) +#define EXT4_SPEC_s_resuid (1 << 14) +#define EXT4_SPEC_s_resgid (1 << 15) +#define EXT4_SPEC_s_commit_interval (1 << 16) +#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) +#define EXT4_SPEC_s_sb_block (1 << 18) +#define EXT4_SPEC_mb_optimize_scan (1 << 19) + +struct ext4_fs_context { + char *s_qf_names[EXT4_MAXQUOTAS]; + char *test_dummy_enc_arg; + int s_jquota_fmt; /* Format of quota to use */ +#ifdef CONFIG_EXT4_DEBUG + int s_fc_debug_max_replay; +#endif + unsigned short qname_spec; + unsigned long vals_s_flags; /* Bits to set in s_flags */ + unsigned long mask_s_flags; /* Bits changed in s_flags */ + unsigned long journal_devnum; + unsigned long s_commit_interval; + unsigned long s_stripe; + unsigned int s_inode_readahead_blks; + unsigned int s_want_extra_isize; + unsigned int s_li_wait_mult; + unsigned int s_max_dir_size_kb; + unsigned int journal_ioprio; + unsigned int vals_s_mount_opt; + unsigned int mask_s_mount_opt; + unsigned int vals_s_mount_opt2; + unsigned int mask_s_mount_opt2; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; + unsigned int opt_flags; /* MOPT flags */ + unsigned int spec; + u32 s_max_batch_time; + u32 s_min_batch_time; + kuid_t s_resuid; + kgid_t s_resgid; + ext4_fsblk_t s_sb_block; }; -static int handle_mount_opt(struct super_block *sb, char *opt, int token, - substring_t *args, struct ext4_parsed_options *parsed_opts, - int is_remount) +static void ext4_fc_free(struct fs_context *fc) { - struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_fs_context *ctx = fc->fs_private; + int i; + + if (!ctx) + return; + + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(ctx->s_qf_names[i]); + + kfree(ctx->test_dummy_enc_arg); + kfree(ctx); +} + +int ext4_init_fs_context(struct fs_context *fc) +{ + struct ext4_fs_context *ctx; + + ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &ext4_context_ops; + + return 0; +} + +#ifdef CONFIG_QUOTA +/* + * Note the name of the specified quota file. + */ +static int note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) +{ + struct ext4_fs_context *ctx = fc->fs_private; + char *qname; + + if (param->size < 1) { + ext4_msg(NULL, KERN_ERR, "Missing quota name"); + return -EINVAL; + } + if (strchr(param->string, '/')) { + ext4_msg(NULL, KERN_ERR, + "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->s_qf_names[qtype]) { + if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) { + ext4_msg(NULL, KERN_ERR, + "%s quota file already specified", + QTYPE2NAME(qtype)); + return -EINVAL; + } + return 0; + } + + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); + if (!qname) { + ext4_msg(NULL, KERN_ERR, + "Not enough memory for storing quotafile name"); + return -ENOMEM; + } + ctx->s_qf_names[qtype] = qname; + ctx->qname_spec |= 1 << qtype; + ctx->spec |= EXT4_SPEC_JQUOTA; + return 0; +} + +/* + * Clear the name of the specified quota file. + */ +static int unnote_qf_name(struct fs_context *fc, int qtype) +{ + struct ext4_fs_context *ctx = fc->fs_private; + + if (ctx->s_qf_names[qtype]) + kfree(ctx->s_qf_names[qtype]); + + ctx->s_qf_names[qtype] = NULL; + ctx->qname_spec |= 1 << qtype; + ctx->spec |= EXT4_SPEC_JQUOTA; + return 0; +} +#endif + +#define EXT4_SET_CTX(name) \ +static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ + unsigned long flag) \ +{ \ + ctx->mask_s_##name |= flag; \ + ctx->vals_s_##name |= flag; \ +} + +#define EXT4_CLEAR_CTX(name) \ +static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ + unsigned long flag) \ +{ \ + ctx->mask_s_##name |= flag; \ + ctx->vals_s_##name &= ~flag; \ +} + +#define EXT4_TEST_CTX(name) \ +static inline unsigned long \ +ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ +{ \ + return (ctx->vals_s_##name & flag); \ +} + +EXT4_SET_CTX(flags); /* set only */ +EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); +EXT4_SET_CTX(mount_opt2); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} + +static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct ext4_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; const struct mount_opts *m; + int is_remount; kuid_t uid; kgid_t gid; - int arg = 0; + int token; -#ifdef CONFIG_QUOTA - if (token == Opt_usrjquota) - return set_qf_name(sb, USRQUOTA, &args[0]); - else if (token == Opt_grpjquota) - return set_qf_name(sb, GRPQUOTA, &args[0]); - else if (token == Opt_offusrjquota) - return clear_qf_name(sb, USRQUOTA); - else if (token == Opt_offgrpjquota) - return clear_qf_name(sb, GRPQUOTA); -#endif - switch (token) { - case Opt_noacl: - case Opt_nouser_xattr: - ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); - break; - case Opt_sb: - return 1; /* handled by get_sb_block() */ - case Opt_removed: - ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); - return 1; - case Opt_abort: - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - return 1; - case Opt_i_version: - sb->s_flags |= SB_I_VERSION; - return 1; - case Opt_lazytime: - sb->s_flags |= SB_LAZYTIME; - return 1; - case Opt_nolazytime: - sb->s_flags &= ~SB_LAZYTIME; - return 1; - case Opt_inlinecrypt: -#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - sb->s_flags |= SB_INLINECRYPT; -#else - ext4_msg(sb, KERN_ERR, "inline encryption not supported"); -#endif - return 1; - } + token = fs_parse(fc, ext4_param_specs, param, &result); + if (token < 0) + return token; + is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; for (m = ext4_mount_opts; m->token != Opt_err; m++) if (token == m->token) break; - if (m->token == Opt_err) { - ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " - "or missing value", opt); - return -1; - } - - if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { - ext4_msg(sb, KERN_ERR, - "Mount option \"%s\" incompatible with ext2", opt); - return -1; - } - if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { - ext4_msg(sb, KERN_ERR, - "Mount option \"%s\" incompatible with ext3", opt); - return -1; - } + ctx->opt_flags |= m->flags; - if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) - return -1; - if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) - return -1; if (m->flags & MOPT_EXPLICIT) { if (m->mount_opt & EXT4_MOUNT_DELALLOC) { - set_opt2(sb, EXPLICIT_DELALLOC); + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC); } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { - set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); + ctx_set_mount_opt2(ctx, + EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM); } else - return -1; - } - if (m->flags & MOPT_CLEAR_ERR) - clear_opt(sb, ERRORS_MASK); - if (token == Opt_noquota && sb_any_quota_loaded(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot change quota " - "options when quota turned on"); - return -1; + return -EINVAL; } if (m->flags & MOPT_NOSUPPORT) { - ext4_msg(sb, KERN_ERR, "%s option not supported", opt); - } else if (token == Opt_commit) { - if (arg == 0) - arg = JBD2_DEFAULT_MAX_COMMIT_AGE; - else if (arg > INT_MAX / HZ) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "%s option not supported", + param->key); + return 0; + } + + switch (token) { +#ifdef CONFIG_QUOTA + case Opt_usrjquota: + if (!*param->string) + return unnote_qf_name(fc, USRQUOTA); + else + return note_qf_name(fc, USRQUOTA, param); + case Opt_grpjquota: + if (!*param->string) + return unnote_qf_name(fc, GRPQUOTA); + else + return note_qf_name(fc, GRPQUOTA, param); +#endif + case Opt_noacl: + case Opt_nouser_xattr: + ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); + break; + case Opt_sb: + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + ext4_msg(NULL, KERN_WARNING, + "Ignoring %s option on remount", param->key); + } else { + ctx->s_sb_block = result.uint_32; + ctx->spec |= EXT4_SPEC_s_sb_block; + } + return 0; + case Opt_removed: + ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option", + param->key); + return 0; + case Opt_abort: + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); + return 0; + case Opt_i_version: + ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); + ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); + ctx_set_flags(ctx, SB_I_VERSION); + return 0; + case Opt_inlinecrypt: +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + ctx_set_flags(ctx, SB_INLINECRYPT); +#else + ext4_msg(NULL, KERN_ERR, "inline encryption not supported"); +#endif + return 0; + case Opt_errors: + ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK); + ctx_set_mount_opt(ctx, result.uint_32); + return 0; +#ifdef CONFIG_QUOTA + case Opt_jqfmt: + ctx->s_jquota_fmt = result.uint_32; + ctx->spec |= EXT4_SPEC_JQFMT; + return 0; +#endif + case Opt_data: + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); + ctx_set_mount_opt(ctx, result.uint_32); + ctx->spec |= EXT4_SPEC_DATAJ; + return 0; + case Opt_commit: + if (result.uint_32 == 0) + ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; + else if (result.uint_32 > INT_MAX / HZ) { + ext4_msg(NULL, KERN_ERR, "Invalid commit interval %d, " "must be smaller than %d", - arg, INT_MAX / HZ); - return -1; + result.uint_32, INT_MAX / HZ); + return -EINVAL; } - sbi->s_commit_interval = HZ * arg; - } else if (token == Opt_debug_want_extra_isize) { - if ((arg & 1) || - (arg < 4) || - (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { - ext4_msg(sb, KERN_ERR, - "Invalid want_extra_isize %d", arg); - return -1; + ctx->s_commit_interval = HZ * result.uint_32; + ctx->spec |= EXT4_SPEC_s_commit_interval; + return 0; + case Opt_debug_want_extra_isize: + if ((result.uint_32 & 1) || (result.uint_32 < 4)) { + ext4_msg(NULL, KERN_ERR, + "Invalid want_extra_isize %d", result.uint_32); + return -EINVAL; } - sbi->s_want_extra_isize = arg; - } else if (token == Opt_max_batch_time) { - sbi->s_max_batch_time = arg; - } else if (token == Opt_min_batch_time) { - sbi->s_min_batch_time = arg; - } else if (token == Opt_inode_readahead_blks) { - if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { - ext4_msg(sb, KERN_ERR, + ctx->s_want_extra_isize = result.uint_32; + ctx->spec |= EXT4_SPEC_s_want_extra_isize; + return 0; + case Opt_max_batch_time: + ctx->s_max_batch_time = result.uint_32; + ctx->spec |= EXT4_SPEC_s_max_batch_time; + return 0; + case Opt_min_batch_time: + ctx->s_min_batch_time = result.uint_32; + ctx->spec |= EXT4_SPEC_s_min_batch_time; + return 0; + case Opt_inode_readahead_blks: + if (result.uint_32 && + (result.uint_32 > (1 << 30) || + !is_power_of_2(result.uint_32))) { + ext4_msg(NULL, KERN_ERR, "EXT4-fs: inode_readahead_blks must be " "0 or a power of 2 smaller than 2^31"); - return -1; + return -EINVAL; } - sbi->s_inode_readahead_blks = arg; - } else if (token == Opt_init_itable) { - set_opt(sb, INIT_INODE_TABLE); - if (!args->from) - arg = EXT4_DEF_LI_WAIT_MULT; - sbi->s_li_wait_mult = arg; - } else if (token == Opt_max_dir_size_kb) { - sbi->s_max_dir_size_kb = arg; + ctx->s_inode_readahead_blks = result.uint_32; + ctx->spec |= EXT4_SPEC_s_inode_readahead_blks; + return 0; + case Opt_init_itable: + ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE); + ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + if (param->type == fs_value_is_string) + ctx->s_li_wait_mult = result.uint_32; + ctx->spec |= EXT4_SPEC_s_li_wait_mult; + return 0; + case Opt_max_dir_size_kb: + ctx->s_max_dir_size_kb = result.uint_32; + ctx->spec |= EXT4_SPEC_s_max_dir_size_kb; + return 0; #ifdef CONFIG_EXT4_DEBUG - } else if (token == Opt_fc_debug_max_replay) { - sbi->s_fc_debug_max_replay = arg; + case Opt_fc_debug_max_replay: + ctx->s_fc_debug_max_replay = result.uint_32; + ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay; + return 0; #endif - } else if (token == Opt_stripe) { - sbi->s_stripe = arg; - } else if (token == Opt_resuid) { - uid = make_kuid(current_user_ns(), arg); + case Opt_stripe: + ctx->s_stripe = result.uint_32; + ctx->spec |= EXT4_SPEC_s_stripe; + return 0; + case Opt_resuid: + uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) { - ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); - return -1; + ext4_msg(NULL, KERN_ERR, "Invalid uid value %d", + result.uint_32); + return -EINVAL; } - sbi->s_resuid = uid; - } else if (token == Opt_resgid) { - gid = make_kgid(current_user_ns(), arg); + ctx->s_resuid = uid; + ctx->spec |= EXT4_SPEC_s_resuid; + return 0; + case Opt_resgid: + gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) { - ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); - return -1; + ext4_msg(NULL, KERN_ERR, "Invalid gid value %d", + result.uint_32); + return -EINVAL; } - sbi->s_resgid = gid; - } else if (token == Opt_journal_dev) { + ctx->s_resgid = gid; + ctx->spec |= EXT4_SPEC_s_resgid; + return 0; + case Opt_journal_dev: if (is_remount) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); - return -1; + return -EINVAL; } - parsed_opts->journal_devnum = arg; - } else if (token == Opt_journal_path) { - char *journal_path; + ctx->journal_devnum = result.uint_32; + ctx->spec |= EXT4_SPEC_JOURNAL_DEV; + return 0; + case Opt_journal_path: + { struct inode *journal_inode; struct path path; int error; if (is_remount) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); - return -1; - } - journal_path = match_strdup(&args[0]); - if (!journal_path) { - ext4_msg(sb, KERN_ERR, "error: could not dup " - "journal device string"); - return -1; + return -EINVAL; } - error = kern_path(journal_path, LOOKUP_FOLLOW, &path); + error = fs_lookup_param(fc, param, 1, &path); if (error) { - ext4_msg(sb, KERN_ERR, "error: could not find " - "journal device path: error %d", error); - kfree(journal_path); - return -1; + ext4_msg(NULL, KERN_ERR, "error: could not find " + "journal device path"); + return -EINVAL; } journal_inode = d_inode(path.dentry); - if (!S_ISBLK(journal_inode->i_mode)) { - ext4_msg(sb, KERN_ERR, "error: journal path %s " - "is not a block device", journal_path); - path_put(&path); - kfree(journal_path); - return -1; - } - - parsed_opts->journal_devnum = new_encode_dev(journal_inode->i_rdev); + ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev); + ctx->spec |= EXT4_SPEC_JOURNAL_DEV; path_put(&path); - kfree(journal_path); - } else if (token == Opt_journal_ioprio) { - if (arg > 7) { - ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" + return 0; + } + case Opt_journal_ioprio: + if (result.uint_32 > 7) { + ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority" " (must be 0-7)"); - return -1; - } - parsed_opts->journal_ioprio = - IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); - } else if (token == Opt_test_dummy_encryption) { - return ext4_set_test_dummy_encryption(sb, opt, &args[0], - is_remount); - } else if (m->flags & MOPT_DATAJ) { - if (is_remount) { - if (!sbi->s_journal) - ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); - else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { - ext4_msg(sb, KERN_ERR, - "Cannot change data mode on remount"); - return -1; - } - } else { - clear_opt(sb, DATA_FLAGS); - sbi->s_mount_opt |= m->mount_opt; + return -EINVAL; } -#ifdef CONFIG_QUOTA - } else if (m->flags & MOPT_QFMT) { - if (sb_any_quota_loaded(sb) && - sbi->s_jquota_fmt != m->mount_opt) { - ext4_msg(sb, KERN_ERR, "Cannot change journaled " - "quota options when quota turned on"); - return -1; + ctx->journal_ioprio = + IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32); + ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; + return 0; + case Opt_test_dummy_encryption: +#ifdef CONFIG_FS_ENCRYPTION + if (param->type == fs_value_is_flag) { + ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; + ctx->test_dummy_enc_arg = NULL; + return 0; } - if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_INFO, - "Quota format mount options ignored " - "when QUOTA feature is enabled"); - return 1; + if (*param->string && + !(!strcmp(param->string, "v1") || + !strcmp(param->string, "v2"))) { + ext4_msg(NULL, KERN_WARNING, + "Value of option \"%s\" is unrecognized", + param->key); + return -EINVAL; } - sbi->s_jquota_fmt = m->mount_opt; + ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; + ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, + GFP_KERNEL); +#else + ext4_msg(NULL, KERN_WARNING, + "Test dummy encryption mount option ignored"); #endif - } else if (token == Opt_dax || token == Opt_dax_always || - token == Opt_dax_inode || token == Opt_dax_never) { + return 0; + case Opt_dax: + case Opt_dax_type: #ifdef CONFIG_FS_DAX - switch (token) { + { + int type = (token == Opt_dax) ? + Opt_dax : result.uint_32; + + switch (type) { case Opt_dax: case Opt_dax_always: - if (is_remount && - (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || - (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { - fail_dax_change_remount: - ext4_msg(sb, KERN_ERR, "can't change " - "dax mount option while remounting"); - return -1; - } - if (is_remount && - (test_opt(sb, DATA_FLAGS) == - EXT4_MOUNT_JOURNAL_DATA)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and dax"); - return -1; - } - ext4_msg(sb, KERN_WARNING, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS; - sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); break; case Opt_dax_never: - if (is_remount && - (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || - (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) - goto fail_dax_change_remount; - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); break; case Opt_dax_inode: - if (is_remount && - ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || - (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || - !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) - goto fail_dax_change_remount; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; - sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); /* Strictly for printing options */ - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE; + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE); break; } + return 0; + } #else - ext4_msg(sb, KERN_INFO, "dax option not supported"); - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; - return -1; + ext4_msg(NULL, KERN_INFO, "dax option not supported"); + return -EINVAL; #endif - } else if (token == Opt_data_err_abort) { - sbi->s_mount_opt |= m->mount_opt; - } else if (token == Opt_data_err_ignore) { - sbi->s_mount_opt &= ~m->mount_opt; - } else if (token == Opt_mb_optimize_scan) { - if (arg != 0 && arg != 1) { - ext4_msg(sb, KERN_WARNING, + case Opt_data_err: + if (result.uint_32 == Opt_data_err_abort) + ctx_set_mount_opt(ctx, m->mount_opt); + else if (result.uint_32 == Opt_data_err_ignore) + ctx_clear_mount_opt(ctx, m->mount_opt); + return 0; + case Opt_mb_optimize_scan: + if (result.int_32 == 1) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else if (result.int_32 == 0) { + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else { + ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); - return -1; + return -EINVAL; } - parsed_opts->mb_optimize_scan = arg; - } else { - if (!args->from) - arg = 1; + return 0; + } + + /* + * At this point we should only be getting options requiring MOPT_SET, + * or MOPT_CLEAR. Anything else is a bug + */ + if (m->token == Opt_err) { + ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s", + param->key); + WARN_ON(1); + return -EINVAL; + } + + else { + unsigned int set = 0; + + if ((param->type == fs_value_is_flag) || + result.uint_32 > 0) + set = 1; + if (m->flags & MOPT_CLEAR) - arg = !arg; + set = !set; else if (unlikely(!(m->flags & MOPT_SET))) { - ext4_msg(sb, KERN_WARNING, - "buggy handling of option %s", opt); + ext4_msg(NULL, KERN_WARNING, + "buggy handling of option %s", + param->key); WARN_ON(1); - return -1; + return -EINVAL; } if (m->flags & MOPT_2) { - if (arg != 0) - sbi->s_mount_opt2 |= m->mount_opt; + if (set != 0) + ctx_set_mount_opt2(ctx, m->mount_opt); else - sbi->s_mount_opt2 &= ~m->mount_opt; + ctx_clear_mount_opt2(ctx, m->mount_opt); } else { - if (arg != 0) - sbi->s_mount_opt |= m->mount_opt; + if (set != 0) + ctx_set_mount_opt(ctx, m->mount_opt); else - sbi->s_mount_opt &= ~m->mount_opt; + ctx_clear_mount_opt(ctx, m->mount_opt); } } - return 1; + + return 0; } -static int parse_options(char *options, struct super_block *sb, - struct ext4_parsed_options *ret_opts, - int is_remount) +static int parse_options(struct fs_context *fc, char *options) { - struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb); - char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; - substring_t args[MAX_OPT_ARGS]; - int token; + struct fs_parameter param; + int ret; + char *key; if (!options) - return 1; + return 0; - while ((p = strsep(&options, ",")) != NULL) { - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - if (handle_mount_opt(sb, p, token, args, ret_opts, - is_remount) < 0) - return 0; + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + param.type = fs_value_is_flag; + param.string = NULL; + + if (value) { + if (value == key) + continue; + + *value++ = 0; + v_len = strlen(value); + param.string = kmemdup_nul(value, v_len, + GFP_KERNEL); + if (!param.string) + return -ENOMEM; + param.type = fs_value_is_string; + } + + param.key = key; + param.size = v_len; + + ret = ext4_parse_param(fc, ¶m); + if (param.string) + kfree(param.string); + if (ret < 0) + return ret; + } + } + + ret = ext4_validate_options(fc); + if (ret < 0) + return ret; + + return 0; +} + +static int parse_apply_sb_mount_options(struct super_block *sb, + struct ext4_fs_context *m_ctx) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *s_mount_opts = NULL; + struct ext4_fs_context *s_ctx = NULL; + struct fs_context *fc = NULL; + int ret = -ENOMEM; + + if (!sbi->s_es->s_mount_opts[0]) + return 0; + + s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, + sizeof(sbi->s_es->s_mount_opts), + GFP_KERNEL); + if (!s_mount_opts) + return ret; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + goto out_free; + + s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); + if (!s_ctx) + goto out_free; + + fc->fs_private = s_ctx; + fc->s_fs_info = sbi; + + ret = parse_options(fc, s_mount_opts); + if (ret < 0) + goto parse_failed; + + ret = ext4_check_opt_consistency(fc, sb); + if (ret < 0) { +parse_failed: + ext4_msg(sb, KERN_WARNING, + "failed to parse options in superblock: %s", + s_mount_opts); + ret = 0; + goto out_free; + } + + if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV) + m_ctx->journal_devnum = s_ctx->journal_devnum; + if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO) + m_ctx->journal_ioprio = s_ctx->journal_ioprio; + + ret = ext4_apply_options(fc, sb); + +out_free: + kfree(s_ctx); + kfree(fc); + kfree(s_mount_opts); + return ret; +} + +static void ext4_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + bool quota_feature = ext4_has_feature_quota(sb); + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *qname; + int i; + + if (quota_feature) + return; + + if (ctx->spec & EXT4_SPEC_JQUOTA) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (!(ctx->qname_spec & (1 << i))) + continue; + + qname = ctx->s_qf_names[i]; /* May be NULL */ + if (qname) + set_opt(sb, QUOTA); + ctx->s_qf_names[i] = NULL; + qname = rcu_replace_pointer(sbi->s_qf_names[i], qname, + lockdep_is_held(&sb->s_umount)); + if (qname) + kfree_rcu(qname); + } } + + if (ctx->spec & EXT4_SPEC_JQFMT) + sbi->s_jquota_fmt = ctx->s_jquota_fmt; +#endif +} + +/* + * Check quota settings consistency. + */ +static int ext4_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) +{ #ifdef CONFIG_QUOTA + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + bool quota_feature = ext4_has_feature_quota(sb); + bool quota_loaded = sb_any_quota_loaded(sb); + bool usr_qf_name, grp_qf_name, usrquota, grpquota; + int quota_flags, i; + /* * We do the test below only for project quotas. 'usrquota' and * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) { - ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. " + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) && + !ext4_has_feature_project(sb)) { + ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); - return 0; + return -EINVAL; } - usr_qf_name = get_qf_name(sb, sbi, USRQUOTA); - grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA); - if (usr_qf_name || grp_qf_name) { - if (test_opt(sb, USRQUOTA) && usr_qf_name) - clear_opt(sb, USRQUOTA); - if (test_opt(sb, GRPQUOTA) && grp_qf_name) - clear_opt(sb, GRPQUOTA); + quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | + EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA; + if (quota_loaded && + ctx->mask_s_mount_opt & quota_flags && + !ctx_test_mount_opt(ctx, quota_flags)) + goto err_quota_change; + + if (ctx->spec & EXT4_SPEC_JQUOTA) { - if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { - ext4_msg(sb, KERN_ERR, "old and new quota " - "format mixing"); + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (!(ctx->qname_spec & (1 << i))) + continue; + + if (quota_loaded && + !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i]) + goto err_jquota_change; + + if (sbi->s_qf_names[i] && ctx->s_qf_names[i] && + strcmp(get_qf_name(sb, sbi, i), + ctx->s_qf_names[i]) != 0) + goto err_jquota_specified; + } + + if (quota_feature) { + ext4_msg(NULL, KERN_INFO, + "Journaled quota options ignored when " + "QUOTA feature is enabled"); return 0; } + } - if (!sbi->s_jquota_fmt) { - ext4_msg(sb, KERN_ERR, "journaled quota format " - "not specified"); + if (ctx->spec & EXT4_SPEC_JQFMT) { + if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded) + goto err_jquota_change; + if (quota_feature) { + ext4_msg(NULL, KERN_INFO, "Quota format mount options " + "ignored when QUOTA feature is enabled"); return 0; } } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) || + ctx->s_qf_names[USRQUOTA]); + grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) || + ctx->s_qf_names[GRPQUOTA]); + + usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) || + test_opt(sb, USRQUOTA)); + + grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) || + test_opt(sb, GRPQUOTA)); + + if (usr_qf_name) { + ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA); + grpquota = false; + } + + if (usr_qf_name || grp_qf_name) { + if (usrquota || grpquota) { + ext4_msg(NULL, KERN_ERR, "old and new quota " + "format mixing"); + return -EINVAL; + } + + if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) { + ext4_msg(NULL, KERN_ERR, "journaled quota format " + "not specified"); + return -EINVAL; + } + } + + return 0; + +err_quota_change: + ext4_msg(NULL, KERN_ERR, + "Cannot change quota options when quota turned on"); + return -EINVAL; +err_jquota_change: + ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota " + "options when quota turned on"); + return -EINVAL; +err_jquota_specified: + ext4_msg(NULL, KERN_ERR, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; +#else + return 0; #endif - if (test_opt(sb, DIOREAD_NOLOCK)) { +} + +static int ext4_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = fc->s_fs_info; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + + if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { + ext4_msg(NULL, KERN_ERR, + "Mount option(s) incompatible with ext2"); + return -EINVAL; + } + if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { + ext4_msg(NULL, KERN_ERR, + "Mount option(s) incompatible with ext3"); + return -EINVAL; + } + + if (ctx->s_want_extra_isize > + (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) { + ext4_msg(NULL, KERN_ERR, + "Invalid want_extra_isize %d", + ctx->s_want_extra_isize); + return -EINVAL; + } + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) { int blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (blocksize < PAGE_SIZE) - ext4_msg(sb, KERN_WARNING, "Warning: mounting with an " + ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an " "experimental mount option 'dioread_nolock' " "for blocksize < PAGE_SIZE"); } + +#ifdef CONFIG_FS_ENCRYPTION + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) && + is_remount && !sbi->s_dummy_enc_policy.policy) { + ext4_msg(NULL, KERN_WARNING, + "Can't set test_dummy_encryption on remount"); + return -1; + } +#endif + + if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) { + if (!sbi->s_journal) { + ext4_msg(NULL, KERN_WARNING, + "Remounting file system with no journal " + "so ignoring journalled data option"); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); + } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) != + test_opt(sb, DATA_FLAGS)) { + ext4_msg(NULL, KERN_ERR, "Cannot change data mode " + "on remount"); + return -EINVAL; + } + } + + if (is_remount) { + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) && + (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { + ext4_msg(NULL, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -EINVAL; + } + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) && + (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { +fail_dax_change_remount: + ext4_msg(NULL, KERN_ERR, "can't change " + "dax mount option while remounting"); + return -EINVAL; + } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) && + (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) { + goto fail_dax_change_remount; + } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) && + ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) { + goto fail_dax_change_remount; + } + } + + return ext4_check_quota_consistency(fc, sb); +} + +static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) +{ + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = fc->s_fs_info; + int ret = 0; + + sbi->s_mount_opt &= ~ctx->mask_s_mount_opt; + sbi->s_mount_opt |= ctx->vals_s_mount_opt; + sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2; + sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2; + sbi->s_mount_flags &= ~ctx->mask_s_mount_flags; + sbi->s_mount_flags |= ctx->vals_s_mount_flags; + sb->s_flags &= ~ctx->mask_s_flags; + sb->s_flags |= ctx->vals_s_flags; + + /* + * i_version differs from common mount option iversion so we have + * to let vfs know that it was set, otherwise it would get cleared + * on remount + */ + if (ctx->mask_s_flags & SB_I_VERSION) + fc->sb_flags |= SB_I_VERSION; + +#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) + APPLY(s_commit_interval); + APPLY(s_stripe); + APPLY(s_max_batch_time); + APPLY(s_min_batch_time); + APPLY(s_want_extra_isize); + APPLY(s_inode_readahead_blks); + APPLY(s_max_dir_size_kb); + APPLY(s_li_wait_mult); + APPLY(s_resgid); + APPLY(s_resuid); + +#ifdef CONFIG_EXT4_DEBUG + APPLY(s_fc_debug_max_replay); +#endif + + ext4_apply_quota_options(fc, sb); + + if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) + ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg); + + return ret; +} + + +static int ext4_validate_options(struct fs_context *fc) +{ +#ifdef CONFIG_QUOTA + struct ext4_fs_context *ctx = fc->fs_private; + char *usr_qf_name, *grp_qf_name; + + usr_qf_name = ctx->s_qf_names[USRQUOTA]; + grp_qf_name = ctx->s_qf_names[GRPQUOTA]; + + if (usr_qf_name || grp_qf_name) { + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name) + ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA); + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name) + ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA); + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) || + ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) { + ext4_msg(NULL, KERN_ERR, "old and new quota " + "format mixing"); + return -EINVAL; + } + } +#endif return 1; } @@ -2533,12 +2987,12 @@ static inline void ext4_show_quota_options(struct seq_file *seq, static const char *token2str(int token) { - const struct match_token *t; + const struct fs_parameter_spec *spec; - for (t = tokens; t->token != Opt_err; t++) - if (t->token == token && !strchr(t->pattern, '=')) + for (spec = ext4_param_specs; spec->name != NULL; spec++) + if (spec->opt == token && !spec->type) break; - return t->pattern; + return spec->name; } /* @@ -2564,7 +3018,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, for (m = ext4_mount_opts; m->token != Opt_err; m++) { int want_set = m->flags & MOPT_SET; if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || - (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP) + m->flags & MOPT_SKIP) continue; if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) continue; /* skip if same as the default */ @@ -2712,8 +3166,6 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); - - cleancache_init_fs(sb); return err; } @@ -3034,8 +3486,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) */ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) { - unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; + loff_t upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; + unsigned int ppb = 1 << (bits - 2); /* * This is calculated to be the largest file size for a dense, block @@ -3067,27 +3520,42 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } + /* Compute how many blocks we can address by block tree */ + res += ppb; + res += ppb * ppb; + res += ((loff_t)ppb) * ppb * ppb; + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; + /* Does block tree limit file size? */ + if (res + meta_blocks <= upper_limit) + goto check_lfs; + + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT4_NDIR_BLOCKS; /* indirect blocks */ meta_blocks = 1; + upper_limit -= ppb; /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - - res += 1LL << (bits-2); - res += 1LL << (2*(bits-2)); - res += 1LL << (3*(bits-2)); + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) + + DIV_ROUND_UP_ULL(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: res <<= bits; - if (res > upper_limit) - res = upper_limit; - if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; - return (loff_t)res; + return res; } static ext4_fsblk_t descriptor_loc(struct super_block *sb, @@ -3172,7 +3640,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " @@ -3876,21 +4344,52 @@ static void ext4_setup_csum_trigger(struct super_block *sb, sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger; } -static int ext4_fill_super(struct super_block *sb, void *data, int silent) +static void ext4_free_sbi(struct ext4_sb_info *sbi) +{ + if (!sbi) + return; + + kfree(sbi->s_blockgroup_lock); + fs_put_dax(sbi->s_daxdev); + kfree(sbi); +} + +static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) +{ + struct ext4_sb_info *sbi; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return NULL; + + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off); + + sbi->s_blockgroup_lock = + kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); + + if (!sbi->s_blockgroup_lock) + goto err_out; + + sb->s_fs_info = sbi; + sbi->s_sb = sb; + return sbi; +err_out: + fs_put_dax(sbi->s_daxdev); + kfree(sbi); + return NULL; +} + +static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { - struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); - char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **flex_groups; ext4_fsblk_t block; - ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; struct inode *root; - const char *descr; int ret = -ENOMEM; int blocksize, clustersize; unsigned int db_count; @@ -3899,32 +4398,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) __u64 blocks_count; int err = 0; ext4_group_t first_not_zeroed; - struct ext4_parsed_options parsed_opts; + struct ext4_fs_context *ctx = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; /* Set defaults for the variables that will be set during parsing */ - parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - parsed_opts.journal_devnum = 0; - parsed_opts.mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - if ((data && !orig_data) || !sbi) - goto out_free_base; - - sbi->s_daxdev = dax_dev; - sbi->s_blockgroup_lock = - kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) - goto out_free_base; - - sb->s_fs_info = sbi; - sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; - sbi->s_sb_block = sb_block; sbi->s_sectors_written_start = part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); - /* Cleanup superblock name */ - strreplace(sb->s_id, '/', '!'); - /* -EINVAL is default */ ret = -EINVAL; blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); @@ -3938,10 +4421,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * block sizes. We need to calculate the offset from buffer start. */ if (blocksize != EXT4_MIN_BLOCK_SIZE) { - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); } else { - logical_sb_block = sb_block; + logical_sb_block = sbi->s_sb_block; } bh = ext4_sb_bread_unmovable(sb, logical_sb_block); @@ -4146,31 +4629,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - if (sbi->s_es->s_mount_opts[0]) { - char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, - sizeof(sbi->s_es->s_mount_opts), - GFP_KERNEL); - if (!s_mount_opts) - goto failed_mount; - if (!parse_options(s_mount_opts, sb, &parsed_opts, 0)) { - ext4_msg(sb, KERN_WARNING, - "failed to parse options in superblock: %s", - s_mount_opts); - } - kfree(s_mount_opts); - } + err = parse_apply_sb_mount_options(sb, ctx); + if (err < 0) + goto failed_mount; + sbi->s_def_mount_opt = sbi->s_mount_opt; - if (!parse_options((char *) data, sb, &parsed_opts, 0)) + + err = ext4_check_opt_consistency(fc, sb); + if (err < 0) goto failed_mount; -#ifdef CONFIG_UNICODE + err = ext4_apply_options(fc, sb); + if (err < 0) + goto failed_mount; + +#if IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; - __u16 encoding_flags; + __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); - if (ext4_sb_read_encoding(es, &encoding_info, - &encoding_flags)) { + encoding_info = ext4_sb_read_encoding(es); + if (!encoding_info) { ext4_msg(sb, KERN_ERR, "Encoding requested by superblock is unknown"); goto failed_mount; @@ -4179,15 +4659,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) encoding = utf8_load(encoding_info->version); if (IS_ERR(encoding)) { ext4_msg(sb, KERN_ERR, - "can't mount with superblock charset: %s-%s " + "can't mount with superblock charset: %s-%u.%u.%u " "not supported by the kernel. flags: 0x%x.", - encoding_info->name, encoding_info->version, + encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), encoding_flags); goto failed_mount; } ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " - "%s-%s with flags 0x%hx", encoding_info->name, - encoding_info->version?:"\b", encoding_flags); + "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); sb->s_encoding = encoding; sb->s_encoding_flags = encoding_flags; @@ -4299,9 +4785,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) - set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + if (sbi->s_daxdev) { + if (blocksize == PAGE_SIZE) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + else + ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + } if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { @@ -4337,7 +4826,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); bh = ext4_sb_bread_unmovable(sb, logical_sb_block); if (IS_ERR(bh)) { @@ -4620,14 +5109,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Initialize fast commit stuff */ atomic_set(&sbi->s_fc_subtid, 0); - atomic_set(&sbi->s_fc_ineligible_updates, 0); INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); sbi->s_fc_bytes = 0; ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + sbi->s_fc_ineligible_tid = 0; spin_lock_init(&sbi->s_fc_lock); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); sbi->s_fc_replay_state.fc_regions = NULL; @@ -4653,7 +5141,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - err = ext4_load_journal(sb, es, parsed_opts.journal_devnum); + err = ext4_load_journal(sb, es, ctx->journal_devnum); if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && @@ -4753,7 +5241,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } - set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio); + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); sbi->s_journal->j_submit_inode_data_buffers = ext4_journal_submit_inode_data_buffers; @@ -4865,12 +5353,12 @@ no_journal: * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (parsed_opts.mb_optimize_scan == 1) - set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (parsed_opts.mb_optimize_scan == 0) - clear_opt2(sb, MB_OPTIMIZE_SCAN); - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) - set_opt2(sb, MB_OPTIMIZE_SCAN); + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) + set_opt2(sb, MB_OPTIMIZE_SCAN); + else + clear_opt2(sb, MB_OPTIMIZE_SCAN); + } err = ext4_mb_init(sb); if (err) { @@ -4969,15 +5457,6 @@ no_journal: if (err) goto failed_mount9; } - if (EXT4_SB(sb)->s_journal) { - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - descr = " journalled data mode"; - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - descr = " ordered data mode"; - else - descr = " writeback data mode"; - } else - descr = "out journal"; if (test_opt(sb, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); @@ -4987,14 +5466,6 @@ no_journal: "the device does not support discard"); } - if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s. Quota mode: %s.", descr, - (int) sizeof(sbi->s_es->s_mount_opts), - sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data, - ext4_quota_mode(sb)); - if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -5005,7 +5476,6 @@ no_journal: atomic_set(&sbi->s_warning_count, 0); atomic_set(&sbi->s_msg_count, 0); - kfree(orig_data); return 0; cantfind_ext4: @@ -5077,7 +5547,7 @@ failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -5091,14 +5561,60 @@ failed_mount: ext4_blkdev_remove(sbi); out_fail: sb->s_fs_info = NULL; - kfree(sbi->s_blockgroup_lock); -out_free_base: - kfree(sbi); - kfree(orig_data); - fs_put_dax(dax_dev); return err ? err : ret; } +static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi; + const char *descr; + int ret; + + sbi = ext4_alloc_sbi(sb); + if (!sbi) + return -ENOMEM; + + fc->s_fs_info = sbi; + + /* Cleanup superblock name */ + strreplace(sb->s_id, '/', '!'); + + sbi->s_sb_block = 1; /* Default super block location */ + if (ctx->spec & EXT4_SPEC_s_sb_block) + sbi->s_sb_block = ctx->s_sb_block; + + ret = __ext4_fill_super(fc, sb); + if (ret < 0) + goto free_sbi; + + if (sbi->s_journal) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + descr = " journalled data mode"; + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + descr = " ordered data mode"; + else + descr = " writeback data mode"; + } else + descr = "out journal"; + + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " + "Quota mode: %s.", descr, ext4_quota_mode(sb)); + + return 0; + +free_sbi: + ext4_free_sbi(sbi); + fc->s_fs_info = NULL; + return ret; +} + +static int ext4_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, ext4_fill_super); +} + /* * Setup any per-fs journal parameters now. We'll do this both on * initial mount, once the journal has been initialised but before we've @@ -5727,11 +6243,12 @@ struct ext4_mount_options { #endif }; -static int ext4_remount(struct super_block *sb, int *flags, char *data) +static int __ext4_remount(struct fs_context *fc, struct super_block *sb) { + struct ext4_fs_context *ctx = fc->fs_private; struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long old_sb_flags, vfs_flags; + unsigned long old_sb_flags; struct ext4_mount_options old_opts; ext4_group_t g; int err = 0; @@ -5740,14 +6257,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int i, j; char *to_free[EXT4_MAXQUOTAS]; #endif - char *orig_data = kstrdup(data, GFP_KERNEL); - struct ext4_parsed_options parsed_opts; - parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - parsed_opts.journal_devnum = 0; - - if (data && !orig_data) - return -ENOMEM; + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; /* Store the original options */ old_sb_flags = sb->s_flags; @@ -5768,28 +6279,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!old_opts.s_qf_names[i]) { for (j = 0; j < i; j++) kfree(old_opts.s_qf_names[j]); - kfree(orig_data); return -ENOMEM; } } else old_opts.s_qf_names[i] = NULL; #endif if (sbi->s_journal && sbi->s_journal->j_task->io_context) - parsed_opts.journal_ioprio = + ctx->journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; - /* - * Some options can be enabled by ext4 and/or by VFS mount flag - * either way we need to make sure it matches in both *flags and - * s_flags. Copy those selected flags from *flags to s_flags - */ - vfs_flags = SB_LAZYTIME | SB_I_VERSION; - sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); - - if (!parse_options(data, sb, &parsed_opts, 1)) { - err = -EINVAL; - goto restore_opts; - } + ext4_apply_options(fc, sb); if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { @@ -5836,19 +6335,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) { ext4_init_journal_params(sb, sbi->s_journal); - set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio); + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); } /* Flush outstanding errors before changing fs state */ flush_work(&sbi->s_error_work); - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { + if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; goto restore_opts; } - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { err = sync_filesystem(sb); if (err < 0) goto restore_opts; @@ -5996,16 +6495,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); - /* - * Some options can be enabled by ext4 and/or by VFS mount flag - * either way we need to make sure it matches in both *flags and - * s_flags. Copy those selected flags from s_flags to *flags - */ - *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", - orig_data, ext4_quota_mode(sb)); - kfree(orig_data); return 0; restore_opts: @@ -6031,10 +6520,30 @@ restore_opts: #endif if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); - kfree(orig_data); return err; } +static int ext4_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + int ret; + + fc->s_fs_info = EXT4_SB(sb); + + ret = ext4_check_opt_consistency(fc, sb); + if (ret < 0) + return ret; + + ret = __ext4_remount(fc, sb); + if (ret < 0) + return ret; + + ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.", + ext4_quota_mode(sb)); + + return 0; +} + #ifdef CONFIG_QUOTA static int ext4_statfs_project(struct super_block *sb, kprojid_t projid, struct kstatfs *buf) @@ -6275,10 +6784,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) { - lockdep_set_quota_inode(path->dentry->d_inode, - I_DATA_SEM_NORMAL); - } else { + if (!err) { struct inode *inode = d_inode(path->dentry); handle_t *handle; @@ -6298,7 +6804,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); + if (err) + dquot_quota_off(sb, type); } + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); return err; } @@ -6361,8 +6872,19 @@ int ext4_enable_quotas(struct super_block *sb) "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) + for (type--; type >= 0; type--) { + struct inode *inode; + + inode = sb_dqopt(sb)->files[type]; + if (inode) + inode = igrab(inode); dquot_quota_off(sb, type); + if (inode) { + lockdep_set_quota_inode(inode, + I_DATA_SEM_NORMAL); + iput(inode); + } + } return err; } @@ -6466,7 +6988,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (EXT4_SB(sb)->s_journal && !handle) { + if (!handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); @@ -6517,12 +7039,6 @@ out: } #endif -static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); -} - #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static inline void register_as_ext2(void) { @@ -6580,11 +7096,12 @@ static inline int ext3_feature_set_ok(struct super_block *sb) } static struct file_system_type ext4_fs_type = { - .owner = THIS_MODULE, - .name = "ext4", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .owner = THIS_MODULE, + .name = "ext4", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ext4"); @@ -6649,6 +7166,7 @@ static int __init ext4_init_fs(void) out: unregister_as_ext2(); unregister_as_ext3(); + ext4_fc_destroy_dentry_cache(); out05: destroy_inodecache(); out1: @@ -6675,6 +7193,7 @@ static void __exit ext4_exit_fs(void) unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); + ext4_fc_destroy_dentry_cache(); destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 2314f7446592..d233c24ea342 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -63,7 +63,7 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - return snprintf(buf, PAGE_SIZE, "%lu\n", + return sysfs_emit(buf, "%lu\n", (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -72,7 +72,7 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); @@ -130,8 +130,8 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi, static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf) { if (!sbi->s_journal) - return snprintf(buf, PAGE_SIZE, "<none>\n"); - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "<none>\n"); + return sysfs_emit(buf, "%d\n", task_pid_vnr(sbi->s_journal->j_task)); } @@ -245,6 +245,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); +EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -295,6 +296,7 @@ static struct attribute *ext4_attrs[] = { #endif ATTR_LIST(mb_prefetch), ATTR_LIST(mb_prefetch_limit), + ATTR_LIST(last_trim_minblks), NULL, }; ATTRIBUTE_GROUPS(ext4); @@ -307,7 +309,7 @@ EXT4_ATTR_FEATURE(meta_bg_resize); EXT4_ATTR_FEATURE(encryption); EXT4_ATTR_FEATURE(test_dummy_encryption_v2); #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) EXT4_ATTR_FEATURE(casefold); #endif #ifdef CONFIG_FS_VERITY @@ -315,7 +317,7 @@ EXT4_ATTR_FEATURE(verity); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(fast_commit); -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) EXT4_ATTR_FEATURE(encrypted_casefold); #endif @@ -327,7 +329,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif #ifdef CONFIG_FS_VERITY @@ -335,7 +337,7 @@ static struct attribute *ext4_feat_attrs[] = { #endif ATTR_LIST(metadata_csum_seed), ATTR_LIST(fast_commit), -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) ATTR_LIST(encrypted_casefold), #endif NULL, @@ -357,7 +359,7 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) { - return snprintf(buf, PAGE_SIZE, "%lld\n", + return sysfs_emit(buf, "%lld\n", ((time64_t)hi << 32) + le32_to_cpu(lo)); } @@ -374,7 +376,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, switch (a->attr_id) { case attr_delayed_allocation_blocks: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (s64) EXT4_C2B(sbi, percpu_counter_sum(&sbi->s_dirtyclusters_counter))); case attr_session_write_kbytes: @@ -382,11 +384,11 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_lifetime_write_kbytes: return lifetime_write_kbytes_show(sbi, buf); case attr_reserved_clusters: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); case attr_sra_exceeded_retry_limit: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: @@ -394,42 +396,42 @@ static ssize_t ext4_attr_show(struct kobject *kobj, if (!ptr) return 0; if (a->attr_ptr == ptr_ext4_super_block_offset) - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr)); else - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr)); case attr_pointer_ul: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%lu\n", + return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr)); case attr_pointer_u8: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr)); case attr_pointer_u64: if (!ptr) return 0; if (a->attr_ptr == ptr_ext4_super_block_offset) - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr)); else - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr)); case attr_pointer_string: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%.*s\n", a->attr_size, + return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr); case attr_pointer_atomic: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr)); case attr_feature: - return snprintf(buf, PAGE_SIZE, "supported\n"); + return sysfs_emit(buf, "supported\n"); case attr_first_error_time: return print_tstamp(buf, sbi->s_es, s_first_error_time); case attr_last_error_time: diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1e0fc1ed845b..042325349098 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2408,7 +2408,7 @@ retry_inode: if (IS_SYNC(inode)) ext4_handle_sync(handle); } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); cleanup: brelse(is.iloc.bh); @@ -2486,7 +2486,7 @@ retry: if (error == 0) error = error2; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); return error; } @@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, error); goto cleanup; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); } error = 0; cleanup: |