From fbe104942d3ff44f6802e8e4a3fbf267c1fb9ac4 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 9 Jul 2012 16:29:29 -0400 Subject: ext4: split ext4_file_write into buffered IO and direct IO ext4_file_dio_write is defined in order to split buffered IO and direct IO in ext4. This patch just refactor some stuff in write path. CC: Tao Ma CC: Eric Sandeen CC: Robin Dong Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/file.c | 60 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 22 deletions(-) (limited to 'fs/ext4/file.c') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8c7642a00054..a10dc7742aec 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -90,34 +90,16 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, } static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int unaligned_aio = 0; ssize_t ret; - /* - * If we have encountered a bitmap-format file, the size limit - * is smaller than s_maxbytes, which is for extent-mapped files. - */ - - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - size_t length = iov_length(iov, nr_segs); - - if ((pos > sbi->s_bitmap_maxbytes || - (pos == sbi->s_bitmap_maxbytes && length > 0))) - return -EFBIG; - - if (pos + length > sbi->s_bitmap_maxbytes) { - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, - sbi->s_bitmap_maxbytes - pos); - } - } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && - !is_sync_kiocb(iocb))) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + !is_sync_kiocb(iocb)) unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); - } /* Unaligned direct AIO must be serialized; see comment above */ if (unaligned_aio) { @@ -141,6 +123,40 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, return ret; } +static ssize_t +ext4_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + ssize_t ret; + + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. + */ + + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + size_t length = iov_length(iov, nr_segs); + + if ((pos > sbi->s_bitmap_maxbytes || + (pos == sbi->s_bitmap_maxbytes && length > 0))) + return -EFBIG; + + if (pos + length > sbi->s_bitmap_maxbytes) { + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, + sbi->s_bitmap_maxbytes - pos); + } + } + + if (unlikely(iocb->ki_filp->f_flags & O_DIRECT)) + ret = ext4_file_dio_write(iocb, iov, nr_segs, pos); + else + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + + return ret; +} + static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ext4_page_mkwrite, -- cgit v1.2.3 From 4bd809dbbf177ad0c450d702466b1da63e1b4b7e Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 22 Jul 2012 20:19:31 -0400 Subject: ext4: don't take the i_mutex lock when doing DIO overwrites Aligned and overwrite direct I/O can be parallelized. In ext4_file_dio_write, we first check whether these conditions are satisfied or not. If so, we take i_data_sem and release i_mutex lock directly. Meanwhile iocb->private is set to indicate that this is a dio overwrite, and it will be handled in ext4_ext_direct_IO. [ Added fix from Dan Carpenter to fix locking bug on the error path. ] CC: Tao Ma CC: Eric Sandeen CC: Robin Dong Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Signed-off-by: Dan Carpenter --- fs/ext4/file.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/inode.c | 24 ++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 4 deletions(-) (limited to 'fs/ext4/file.c') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a10dc7742aec..1c81509f5bd9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -93,9 +93,13 @@ static ssize_t ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct blk_plug plug; int unaligned_aio = 0; ssize_t ret; + int overwrite = 0; + size_t length = iov_length(iov, nr_segs); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb)) @@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, ext4_aiodio_wait(inode); } - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + BUG_ON(iocb->ki_pos != pos); + + mutex_lock(&inode->i_mutex); + blk_start_plug(&plug); + + iocb->private = &overwrite; + + /* check whether we do a DIO overwrite or not */ + if (ext4_should_dioread_nolock(inode) && !unaligned_aio && + !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { + struct ext4_map_blocks map; + unsigned int blkbits = inode->i_blkbits; + int err, len; + + map.m_lblk = pos >> blkbits; + map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) + - map.m_lblk; + len = map.m_len; + + err = ext4_map_blocks(NULL, inode, &map, 0); + /* + * 'err==len' means that all of blocks has been preallocated no + * matter they are initialized or not. For excluding + * uninitialized extents, we need to check m_flags. There are + * two conditions that indicate for initialized extents. + * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; + * 2) If we do a real lookup, non-flags are returned. + * So we should check these two conditions. + */ + if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) + overwrite = 1; + } + + ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + + if (ret > 0 || ret == -EIOCBQUEUED) { + ssize_t err; + + err = generic_write_sync(file, pos, ret); + if (err < 0 && ret > 0) + ret = err; + } + blk_finish_plug(&plug); if (unaligned_aio) mutex_unlock(ext4_aio_mutex(inode)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76cb3b1ad78a..bed574dd4c22 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE && final_size <= inode->i_size) { int overwrite = 0; + BUG_ON(iocb->private == NULL); + + /* If we do a overwrite dio, i_mutex locking can be released */ + overwrite = *((int *)iocb->private); + + if (overwrite) { + down_read(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&inode->i_mutex); + } + /* * We could direct write to holes and fallocate. * @@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (!is_sync_kiocb(iocb)) { ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) - return -ENOMEM; + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } io_end->flag |= EXT4_IO_END_DIRECT; iocb->private = io_end; /* @@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = err; ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } + + retake_lock: + /* take i_mutex locking again if we do a ovewrite dio */ + if (overwrite) { + up_read(&EXT4_I(inode)->i_data_sem); + mutex_lock(&inode->i_mutex); + } + return ret; } -- cgit v1.2.3 From 044ce47fec90ec0f25605e87a5d72cca14568bc3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 22 Jul 2012 20:31:31 -0400 Subject: ext4: convert last user of ext4_mark_super_dirty() to ext4_handle_dirty_super() The last user of ext4_mark_super_dirty() in ext4_file_open() is so rare it can well be modifying the superblock properly by journalling the change. Change it and get rid of ext4_mark_super_dirty() as it's not needed anymore. Artem: small amendments. Artem: tested using xfstests for both journalled and non-journalled ext4. Signed-off-by: Jan Kara Signed-off-by: Artem Bityutskiy Signed-off-by: "Theodore Ts'o" Tested-by: Artem Bityutskiy --- fs/ext4/ext4.h | 9 --------- fs/ext4/file.c | 14 +++++++++++++- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/ext4/file.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e8e8afa402f1..c3411d4ce2da 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2328,15 +2328,6 @@ static inline void ext4_unlock_group(struct super_block *sb, spin_unlock(ext4_group_lock_ptr(sb, group)); } -static inline void ext4_mark_super_dirty(struct super_block *sb) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - ext4_superblock_csum_set(sb, es); - if (EXT4_SB(sb)->s_journal == NULL) - sb->s_dirt =1; -} - /* * Block validity checking */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1c81509f5bd9..f77e795fed65 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -244,9 +244,21 @@ static int ext4_file_open(struct inode * inode, struct file * filp) path.dentry = mnt->mnt_root; cp = d_path(&path, buf, sizeof(buf)); if (!IS_ERR(cp)) { + handle_t *handle; + int err; + + handle = ext4_journal_start_sb(sb, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) { + ext4_journal_stop(handle); + return err; + } strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); - ext4_mark_super_dirty(sb); + ext4_handle_dirty_super(handle, sb); + ext4_journal_stop(handle); } } /* -- cgit v1.2.3