diff options
Diffstat (limited to 'fs/ext4/file.c')
-rw-r--r-- | fs/ext4/file.c | 110 |
1 files changed, 61 insertions, 49 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d101b3b0c7da..c457c8517f0f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -147,6 +147,17 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } +static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct inode *inode = file_inode(in); + + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + return filemap_splice_read(in, ppos, pipe, len, flags); +} + /* * Called when an inode is released. Note that this is different * from ext4_file_open: open gets called at every open, but release @@ -285,18 +296,13 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, if (ret <= 0) goto out; - current->backing_dev_info = inode_to_bdi(inode); ret = generic_perform_write(iocb, from); - current->backing_dev_info = NULL; out: inode_unlock(inode); - if (likely(ret > 0)) { - iocb->ki_pos += ret; - ret = generic_write_sync(iocb, ret); - } - - return ret; + if (unlikely(ret <= 0)) + return ret; + return generic_write_sync(iocb, ret); } static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, @@ -444,13 +450,14 @@ static const struct iomap_dio_ops ext4_dio_write_ops = { */ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, bool *ilock_shared, bool *extend, - bool *unwritten) + bool *unwritten, int *dio_flags) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); loff_t offset; size_t count; ssize_t ret; + bool overwrite, unaligned_io; restart: ret = ext4_generic_write_checks(iocb, from); @@ -459,16 +466,20 @@ restart: offset = iocb->ki_pos; count = ret; - if (ext4_extending_io(inode, offset, count)) - *extend = true; + + unaligned_io = ext4_unaligned_io(inode, from, offset); + *extend = ext4_extending_io(inode, offset, count); + overwrite = ext4_overwrite_io(inode, offset, count, unwritten); + /* - * Determine whether the IO operation will overwrite allocated - * and initialized blocks. - * We need exclusive i_rwsem for changing security info - * in file_modified(). + * Determine whether we need to upgrade to an exclusive lock. This is + * required to change security info in file_modified(), for extending + * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten + * extents (as partial block zeroing may be required). */ - if (*ilock_shared && (!IS_NOSEC(inode) || *extend || - !ext4_overwrite_io(inode, offset, count, unwritten))) { + if (*ilock_shared && + ((!IS_NOSEC(inode) || *extend || !overwrite || + (unaligned_io && *unwritten)))) { if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; @@ -479,6 +490,32 @@ restart: goto restart; } + /* + * Now that locking is settled, determine dio flags and exclusivity + * requirements. Unaligned writes are allowed under shared lock so long + * as they are pure overwrites. Set the iomap overwrite only flag as an + * added precaution in this case. Even though this is unnecessary, we + * can detect and warn on unexpected -EAGAIN if an unsafe unaligned + * write is ever submitted. + * + * Otherwise, concurrent unaligned writes risk data corruption due to + * partial block zeroing in the dio layer, and so the I/O must occur + * exclusively. The inode lock is already held exclusive if the write is + * non-overwrite or extending, so drain all outstanding dio and set the + * force wait dio flag. + */ + if (*ilock_shared && unaligned_io) { + *dio_flags = IOMAP_DIO_OVERWRITE_ONLY; + } else if (!*ilock_shared && (unaligned_io || *extend)) { + if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } + if (unaligned_io && (!overwrite || *unwritten)) + inode_dio_wait(inode); + *dio_flags = IOMAP_DIO_FORCE_WAIT; + } + ret = file_modified(file); if (ret < 0) goto out; @@ -500,18 +537,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(from); const struct iomap_ops *iomap_ops = &ext4_iomap_ops; - bool extend = false, unaligned_io = false, unwritten = false; + bool extend = false, unwritten = false; bool ilock_shared = true; + int dio_flags = 0; /* - * We initially start with shared inode lock unless it is - * unaligned IO which needs exclusive lock anyways. - */ - if (ext4_unaligned_io(inode, from, offset)) { - unaligned_io = true; - ilock_shared = false; - } - /* * Quick check here without any i_rwsem lock to see if it is extending * IO. A more reliable check is done in ext4_dio_write_checks() with * proper locking in place. @@ -543,16 +573,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) return ext4_buffered_write_iter(iocb, from); } - ret = ext4_dio_write_checks(iocb, from, - &ilock_shared, &extend, &unwritten); + ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend, + &unwritten, &dio_flags); if (ret <= 0) return ret; - /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */ - if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) { - ret = -EAGAIN; - goto out; - } /* * Make sure inline data cannot be created anymore since we are going * to allocate blocks for DIO. We know the inode does not have any @@ -563,19 +588,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) offset = iocb->ki_pos; count = ret; - /* - * Unaligned direct IO must be serialized among each other as zeroing - * of partial blocks of two competing unaligned IOs can result in data - * corruption. - * - * So we make sure we don't allow any unaligned IO in flight. - * For IOs where we need not wait (like unaligned non-AIO DIO), - * below inode_dio_wait() may anyway become a no-op, since we start - * with exclusive lock. - */ - if (unaligned_io) - inode_dio_wait(inode); - if (extend) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { @@ -595,8 +607,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ilock_shared && !unwritten) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, - (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, - NULL, 0); + dio_flags, NULL, 0); + WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)); if (ret == -ENOTBLK) ret = 0; @@ -957,7 +969,7 @@ const struct file_operations ext4_file_operations = { .release = ext4_release_file, .fsync = ext4_sync_file, .get_unmapped_area = thp_get_unmapped_area, - .splice_read = generic_file_splice_read, + .splice_read = ext4_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, }; |