From 63b1e9bccb71fe7d7e3ddc9877dbdc85e5d2d023 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 26 Oct 2022 12:23:09 +0800 Subject: ext4: add EXT4_IGET_BAD flag to prevent unexpected bad inode There are many places that will get unhappy (and crash) when ext4_iget() returns a bad inode. However, if iget the boot loader inode, allows a bad inode to be returned, because the inode may not be initialized. This mechanism can be used to bypass some checks and cause panic. To solve this problem, we add a special iget flag EXT4_IGET_BAD. Only with this flag we'd be returning bad inode from ext4_iget(), otherwise we always return the error code if the inode is bad inode.(suggested by Jan Kara) Signed-off-by: Baokun Li Reviewed-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221026042310.3839669-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8d5453852f98..2b574b143bde 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2964,7 +2964,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, typedef enum { EXT4_IGET_NORMAL = 0, EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ - EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */ + EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */ + EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */ } ext4_iget_flags; extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, -- cgit v1.2.3 From 3bf678a0f9c017c9ba7c581541dbc8453452a7ae Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 13:58:33 +0800 Subject: ext4: fix undefined behavior in bit shift for ext4_check_flag_values Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in fs/ext4/ext4.h:591:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c ext4_init_fs+0x5a/0x277 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Fixes: 9a4c80194713 ("ext4: ensure Inode flags consistency are checked at build time") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221031055833.3966222-1-cuigaosheng1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2b574b143bde..3afdd99bb214 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -558,7 +558,7 @@ enum { * * It's not paranoia if the Murphy's Law really *is* out to get you. :-) */ -#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) +#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) static inline void ext4_check_flag_values(void) -- cgit v1.2.3 From 4c0d5778385cb3618ff26a561ce41de2b7d9de70 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:36 -0800 Subject: ext4: don't set up encryption key during jbd2 transaction Commit a80f7fcf1867 ("ext4: fixup ext4_fc_track_* functions' signature") extended the scope of the transaction in ext4_unlink() too far, making it include the call to ext4_find_entry(). However, ext4_find_entry() can deadlock when called from within a transaction because it may need to set up the directory's encryption key. Fix this by restoring the transaction to its original scope. Reported-by: syzbot+1a748d0007eeac3ab079@syzkaller.appspotmail.com Fixes: a80f7fcf1867 ("ext4: fixup ext4_fc_track_* functions' signature") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-3-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 ++-- fs/ext4/fast_commit.c | 2 +- fs/ext4/namei.c | 44 ++++++++++++++++++++++++-------------------- 3 files changed, 27 insertions(+), 23 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3afdd99bb214..4e739902dc03 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3620,8 +3620,8 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh, unsigned int blocksize); extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, struct buffer_head *bh); -extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, - struct inode *inode); +extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name, + struct inode *inode, struct dentry *dentry); extern int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6d98f2b39b77..da0c8228cf9c 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1397,7 +1397,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, return 0; } - ret = __ext4_unlink(NULL, old_parent, &entry, inode); + ret = __ext4_unlink(old_parent, &entry, inode, NULL); /* -ENOENT ok coz it might not exist anymore. */ if (ret == -ENOENT) ret = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index c08c0aba1883..a789ea9b61a0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3204,14 +3204,20 @@ end_rmdir: return retval; } -int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, - struct inode *inode) +int __ext4_unlink(struct inode *dir, const struct qstr *d_name, + struct inode *inode, + struct dentry *dentry /* NULL during fast_commit recovery */) { int retval = -ENOENT; struct buffer_head *bh; struct ext4_dir_entry_2 *de; + handle_t *handle; int skip_remove_dentry = 0; + /* + * Keep this outside the transaction; it may have to set up the + * directory's encryption key, which isn't GFP_NOFS-safe. + */ bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -3228,7 +3234,14 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) skip_remove_dentry = 1; else - goto out; + goto out_bh; + } + + handle = ext4_journal_start(dir, EXT4_HT_DIR, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); + goto out_bh; } if (IS_DIRSYNC(dir)) @@ -3237,12 +3250,12 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name if (!skip_remove_dentry) { retval = ext4_delete_entry(handle, dir, de, bh); if (retval) - goto out; + goto out_handle; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) - goto out; + goto out_handle; } else { retval = 0; } @@ -3255,15 +3268,17 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); retval = ext4_mark_inode_dirty(handle, inode); - -out: + if (dentry && !retval) + ext4_fc_track_unlink(handle, dentry); +out_handle: + ext4_journal_stop(handle); +out_bh: brelse(bh); return retval; } static int ext4_unlink(struct inode *dir, struct dentry *dentry) { - handle_t *handle; int retval; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) @@ -3281,16 +3296,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (retval) goto out_trace; - handle = ext4_journal_start(dir, EXT4_HT_DIR, - EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) { - retval = PTR_ERR(handle); - goto out_trace; - } - - retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); - if (!retval) - ext4_fc_track_unlink(handle, dentry); + retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); #if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -3301,8 +3307,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif - if (handle) - ext4_journal_stop(handle); out_trace: trace_ext4_unlink_exit(dentry, retval); -- cgit v1.2.3 From dff4ac75eeeefc4397fe7cf8ce559425bf46b1f7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Dec 2022 12:27:05 +0100 Subject: ext4: move keep_towrite handling to ext4_bio_write_page() When we are writing back page but we cannot for some reason write all its buffers (e.g. because we cannot allocate blocks in current context) we have to keep TOWRITE tag set in the mapping as otherwise racing WB_SYNC_ALL writeback that could write these buffers can skip the page and result in data loss. We will need this logic for writeback during transaction commit so move the logic from ext4_writepage() to ext4_bio_write_page(). Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221207112722.22220-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 +-- fs/ext4/inode.c | 6 ++---- fs/ext4/page-io.c | 36 +++++++++++++++++++++--------------- 3 files changed, 24 insertions(+), 21 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4e739902dc03..0540929630a1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3757,8 +3757,7 @@ extern void ext4_end_io_rsv_work(struct work_struct *work); extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, - int len, - bool keep_towrite); + int len); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a0f4d4197a0b..4c70cc525f10 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2016,7 +2016,6 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; struct ext4_io_submit io_submit; - bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { folio_invalidate(folio, 0, folio_size(folio)); @@ -2074,7 +2073,6 @@ static int ext4_writepage(struct page *page, unlock_page(page); return 0; } - keep_towrite = true; } if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -2091,7 +2089,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite); + ret = ext4_bio_write_page(&io_submit, page, len); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -2125,7 +2123,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - err = ext4_bio_write_page(&mpd->io_submit, page, len, false); + err = ext4_bio_write_page(&mpd->io_submit, page, len); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4e68ace86f11..4f9ecacd10aa 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -430,8 +430,7 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, - int len, - bool keep_towrite) + int len) { struct page *bounce_page = NULL; struct inode *inode = page->mapping->host; @@ -441,14 +440,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, int nr_submitted = 0; int nr_to_submit = 0; struct writeback_control *wbc = io->io_wbc; + bool keep_towrite = false; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - if (keep_towrite) - set_page_writeback_keepwrite(page); - else - set_page_writeback(page); ClearPageError(page); /* @@ -483,12 +479,17 @@ int ext4_bio_write_page(struct ext4_io_submit *io, if (!buffer_mapped(bh)) clear_buffer_dirty(bh); /* - * Keeping dirty some buffer we cannot write? Make - * sure to redirty the page. This happens e.g. when - * doing writeout for transaction commit. + * Keeping dirty some buffer we cannot write? Make sure + * to redirty the page and keep TOWRITE tag so that + * racing WB_SYNC_ALL writeback does not skip the page. + * This happens e.g. when doing writeout for + * transaction commit. */ - if (buffer_dirty(bh) && !PageDirty(page)) - redirty_page_for_writepage(wbc, page); + if (buffer_dirty(bh)) { + if (!PageDirty(page)) + redirty_page_for_writepage(wbc, page); + keep_towrite = true; + } if (io->io_bio) ext4_io_submit(io); continue; @@ -500,6 +501,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io, nr_to_submit++; } while ((bh = bh->b_this_page) != head); + /* Nothing to submit? Just unlock the page... */ + if (!nr_to_submit) + goto unlock; + bh = head = page_buffers(page); /* @@ -550,6 +555,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, } } + if (keep_towrite) + set_page_writeback_keepwrite(page); + else + set_page_writeback(page); + /* Now submit buffers to write */ do { if (!buffer_async_write(bh)) @@ -558,11 +568,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, bounce_page ? bounce_page : page, bh); nr_submitted++; } while ((bh = bh->b_this_page) != head); - unlock: unlock_page(page); - /* Nothing submitted - we have to end page writeback */ - if (!nr_submitted) - end_page_writeback(page); return ret; } -- cgit v1.2.3 From 59205c8d4eaeeb85cf76eb1cb140283cf900412a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Dec 2022 12:27:11 +0100 Subject: ext4: switch to using ext4_do_writepages() for ordered data writeout Use the standard writepages method (ext4_do_writepages()) to perform writeout of ordered data during journal commit. Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221207112722.22220-8-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 + fs/ext4/inode.c | 16 ++++++++++++++++ fs/ext4/super.c | 3 +-- 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0540929630a1..140e1eb300d1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3000,6 +3000,7 @@ extern void ext4_set_inode_flags(struct inode *, bool init); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3aa01a9d4664..56f6ef52fe76 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2960,6 +2960,22 @@ static int ext4_writepages(struct address_space *mapping, return ret; } +int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = jinode->i_dirty_start, + .range_end = jinode->i_dirty_end, + }; + struct mpage_da_data mpd = { + .inode = jinode->i_vfs_inode, + .wbc = &wbc, + .can_map = 0, + }; + return ext4_do_writepages(&mpd); +} + static int ext4_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 28d009151d23..72ead3b56706 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -540,8 +540,7 @@ static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) if (ext4_should_journal_data(jinode->i_vfs_inode)) ret = ext4_journalled_submit_inode_data_buffers(jinode); else - ret = jbd2_journal_submit_inode_data_buffers(jinode); - + ret = ext4_normal_submit_inode_data_buffers(jinode); return ret; } -- cgit v1.2.3