summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/file.c86
1 files changed, 46 insertions, 40 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7da..b18a63e8dbbc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -444,13 +444,14 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
*/
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend,
- bool *unwritten)
+ bool *unwritten, int *dio_flags)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t offset;
size_t count;
ssize_t ret;
+ bool overwrite, unaligned_io;
restart:
ret = ext4_generic_write_checks(iocb, from);
@@ -459,16 +460,20 @@ restart:
offset = iocb->ki_pos;
count = ret;
- if (ext4_extending_io(inode, offset, count))
- *extend = true;
+
+ unaligned_io = ext4_unaligned_io(inode, from, offset);
+ *extend = ext4_extending_io(inode, offset, count);
+ overwrite = ext4_overwrite_io(inode, offset, count, unwritten);
+
/*
- * Determine whether the IO operation will overwrite allocated
- * and initialized blocks.
- * We need exclusive i_rwsem for changing security info
- * in file_modified().
+ * Determine whether we need to upgrade to an exclusive lock. This is
+ * required to change security info in file_modified(), for extending
+ * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
+ * extents (as partial block zeroing may be required).
*/
- if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
- !ext4_overwrite_io(inode, offset, count, unwritten))) {
+ if (*ilock_shared &&
+ ((!IS_NOSEC(inode) || *extend || !overwrite ||
+ (unaligned_io && *unwritten)))) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
@@ -479,6 +484,32 @@ restart:
goto restart;
}
+ /*
+ * Now that locking is settled, determine dio flags and exclusivity
+ * requirements. Unaligned writes are allowed under shared lock so long
+ * as they are pure overwrites. Set the iomap overwrite only flag as an
+ * added precaution in this case. Even though this is unnecessary, we
+ * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
+ * write is ever submitted.
+ *
+ * Otherwise, concurrent unaligned writes risk data corruption due to
+ * partial block zeroing in the dio layer, and so the I/O must occur
+ * exclusively. The inode lock is already held exclusive if the write is
+ * non-overwrite or extending, so drain all outstanding dio and set the
+ * force wait dio flag.
+ */
+ if (*ilock_shared && unaligned_io) {
+ *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
+ } else if (!*ilock_shared && (unaligned_io || *extend)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (unaligned_io && (!overwrite || *unwritten))
+ inode_dio_wait(inode);
+ *dio_flags = IOMAP_DIO_FORCE_WAIT;
+ }
+
ret = file_modified(file);
if (ret < 0)
goto out;
@@ -500,18 +531,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
- bool extend = false, unaligned_io = false, unwritten = false;
+ bool extend = false, unwritten = false;
bool ilock_shared = true;
+ int dio_flags = 0;
/*
- * We initially start with shared inode lock unless it is
- * unaligned IO which needs exclusive lock anyways.
- */
- if (ext4_unaligned_io(inode, from, offset)) {
- unaligned_io = true;
- ilock_shared = false;
- }
- /*
* Quick check here without any i_rwsem lock to see if it is extending
* IO. A more reliable check is done in ext4_dio_write_checks() with
* proper locking in place.
@@ -543,16 +567,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from);
}
- ret = ext4_dio_write_checks(iocb, from,
- &ilock_shared, &extend, &unwritten);
+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend,
+ &unwritten, &dio_flags);
if (ret <= 0)
return ret;
- /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
- if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
- ret = -EAGAIN;
- goto out;
- }
/*
* Make sure inline data cannot be created anymore since we are going
* to allocate blocks for DIO. We know the inode does not have any
@@ -563,19 +582,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
offset = iocb->ki_pos;
count = ret;
- /*
- * Unaligned direct IO must be serialized among each other as zeroing
- * of partial blocks of two competing unaligned IOs can result in data
- * corruption.
- *
- * So we make sure we don't allow any unaligned IO in flight.
- * For IOs where we need not wait (like unaligned non-AIO DIO),
- * below inode_dio_wait() may anyway become a no-op, since we start
- * with exclusive lock.
- */
- if (unaligned_io)
- inode_dio_wait(inode);
-
if (extend) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
@@ -595,8 +601,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
- (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
- NULL, 0);
+ dio_flags, NULL, 0);
+ WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
if (ret == -ENOTBLK)
ret = 0;