diff options
author | Jan Kara <jack@suse.cz> | 2019-12-18 18:44:33 +0100 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2019-12-26 17:57:18 +0100 |
commit | 8cd115bdda17751ee2adab614a80df72228b3809 (patch) | |
tree | cf216cc2c9faab3d704e65595ce486225cf8edd0 | |
parent | ext4: export information about first/last errors via /sys/fs/ext4/<dev> (diff) | |
download | linux-8cd115bdda17751ee2adab614a80df72228b3809.tar.xz linux-8cd115bdda17751ee2adab614a80df72228b3809.zip |
ext4: Optimize ext4 DIO overwrites
Currently we start transaction for mapping every extent for writing
using direct IO. This is unnecessary when we know we are overwriting
already allocated blocks and the overhead of starting a transaction can
be significant especially for multithreaded workloads doing small writes.
Use iomap operations that avoid starting a transaction for direct IO
overwrites.
This improves throughput of 4k random writes - fio jobfile:
[global]
rw=randrw
norandommap=1
invalidate=0
bs=4k
numjobs=16
time_based=1
ramp_time=30
runtime=120
group_reporting=1
ioengine=psync
direct=1
size=16G
filename=file1.0.0:file1.0.1:file1.0.2:file1.0.3:file1.0.4:file1.0.5:file1.0.6:file1.0.7:file1.0.8:file1.0.9:file1.0.10:file1.0.11:file1.0.12:file1.0.13:file1.0.14:file1.0.15:file1.0.16:file1.0.17:file1.0.18:file1.0.19:file1.0.20:file1.0.21:file1.0.22:file1.0.23:file1.0.24:file1.0.25:file1.0.26:file1.0.27:file1.0.28:file1.0.29:file1.0.30:file1.0.31
file_service_type=random
nrfiles=32
from 3018MB/s to 4059MB/s in my test VM running test against simulated
pmem device (note that before iomap conversion, this workload was able
to achieve 3708MB/s because old direct IO path avoided transaction start
for overwrites as well). For dax, the win is even larger improving
throughput from 3042MB/s to 4311MB/s.
Reported-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20191218174433.19380-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/file.c | 5 | ||||
-rw-r--r-- | fs/ext4/inode.c | 21 |
3 files changed, 26 insertions, 1 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5edc16d36a96..791e54425b00 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3455,6 +3455,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) } extern const struct iomap_ops ext4_iomap_ops; +extern const struct iomap_ops ext4_iomap_overwrite_ops; extern const struct iomap_ops ext4_iomap_report_ops; static inline int ext4_buffer_uptodate(struct buffer_head *bh) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9c2711bce0f9..5f225881176b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -447,6 +447,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(from); + const struct iomap_ops *iomap_ops = &ext4_iomap_ops; bool extend = false, unaligned_io = false; bool ilock_shared = true; @@ -526,7 +527,9 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_journal_stop(handle); } - ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops, + if (ilock_shared) + iomap_ops = &ext4_iomap_overwrite_ops; + ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, is_sync_kiocb(iocb) || unaligned_io || extend); if (extend) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c3270aaa2b75..d035acab5b2a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3451,6 +3451,22 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return 0; } +static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned flags, struct iomap *iomap, + struct iomap *srcmap) +{ + int ret; + + /* + * Even for writes we don't need to allocate blocks, so just pretend + * we are reading to save overhead of starting a transaction. + */ + flags &= ~IOMAP_WRITE; + ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap); + WARN_ON_ONCE(iomap->type != IOMAP_MAPPED); + return ret; +} + static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { @@ -3472,6 +3488,11 @@ const struct iomap_ops ext4_iomap_ops = { .iomap_end = ext4_iomap_end, }; +const struct iomap_ops ext4_iomap_overwrite_ops = { + .iomap_begin = ext4_iomap_overwrite_begin, + .iomap_end = ext4_iomap_end, +}; + static bool ext4_iomap_is_delalloc(struct inode *inode, struct ext4_map_blocks *map) { |