summaryrefslogtreecommitdiffstats
path: root/fs/iomap
diff options
context:
space:
mode:
Diffstat (limited to 'fs/iomap')
-rw-r--r--fs/iomap/buffered-io.c110
-rw-r--r--fs/iomap/direct-io.c31
2 files changed, 76 insertions, 65 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d2a9f699e17e..2b82c7f1de88 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -44,20 +44,28 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio)
static struct bio_set iomap_ioend_bioset;
static struct iomap_page *
-iomap_page_create(struct inode *inode, struct folio *folio)
+iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
{
struct iomap_page *iop = to_iomap_page(folio);
unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+ gfp_t gfp;
if (iop || nr_blocks <= 1)
return iop;
+ if (flags & IOMAP_NOWAIT)
+ gfp = GFP_NOWAIT;
+ else
+ gfp = GFP_NOFS | __GFP_NOFAIL;
+
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
- GFP_NOFS | __GFP_NOFAIL);
- spin_lock_init(&iop->uptodate_lock);
- if (folio_test_uptodate(folio))
- bitmap_fill(iop->uptodate, nr_blocks);
- folio_attach_private(folio, iop);
+ gfp);
+ if (iop) {
+ spin_lock_init(&iop->uptodate_lock);
+ if (folio_test_uptodate(folio))
+ bitmap_fill(iop->uptodate, nr_blocks);
+ folio_attach_private(folio, iop);
+ }
return iop;
}
@@ -154,9 +162,6 @@ static void iomap_iop_set_range_uptodate(struct folio *folio,
static void iomap_set_range_uptodate(struct folio *folio,
struct iomap_page *iop, size_t off, size_t len)
{
- if (folio_test_error(folio))
- return;
-
if (iop)
iomap_iop_set_range_uptodate(folio, iop, off, len);
else
@@ -226,7 +231,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
if (WARN_ON_ONCE(size > iomap->length))
return -EIO;
if (offset > 0)
- iop = iomap_page_create(iter->inode, folio);
+ iop = iomap_page_create(iter->inode, folio, iter->flags);
else
iop = to_iomap_page(folio);
@@ -264,7 +269,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
/* zero post-eof blocks as the page may be mapped */
- iop = iomap_page_create(iter->inode, folio);
+ iop = iomap_page_create(iter->inode, folio, iter->flags);
iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
if (plen == 0)
goto done;
@@ -492,31 +497,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
}
EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
-#ifdef CONFIG_MIGRATION
-int
-iomap_migrate_page(struct address_space *mapping, struct page *newpage,
- struct page *page, enum migrate_mode mode)
-{
- struct folio *folio = page_folio(page);
- struct folio *newfolio = page_folio(newpage);
- int ret;
-
- ret = folio_migrate_mapping(mapping, newfolio, folio, 0);
- if (ret != MIGRATEPAGE_SUCCESS)
- return ret;
-
- if (folio_test_private(folio))
- folio_attach_private(newfolio, folio_detach_private(folio));
-
- if (mode != MIGRATE_SYNC_NO_COPY)
- folio_migrate_copy(newfolio, folio);
- else
- folio_migrate_flags(newfolio, folio);
- return MIGRATEPAGE_SUCCESS;
-}
-EXPORT_SYMBOL_GPL(iomap_migrate_page);
-#endif /* CONFIG_MIGRATION */
-
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
@@ -547,10 +527,11 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
size_t len, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- struct iomap_page *iop = iomap_page_create(iter->inode, folio);
+ struct iomap_page *iop;
loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
+ unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio);
size_t from = offset_in_folio(folio, pos), to = from + len;
size_t poff, plen;
@@ -558,6 +539,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
return 0;
folio_clear_error(folio);
+ iop = iomap_page_create(iter->inode, folio, iter->flags);
+ if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
+ return -EAGAIN;
+
do {
iomap_adjust_read_range(iter->inode, folio, &block_start,
block_end - block_start, &poff, &plen);
@@ -574,7 +559,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
return -EIO;
folio_zero_segments(folio, poff, from, to, poff + plen);
} else {
- int status = iomap_read_folio_sync(block_start, folio,
+ int status;
+
+ if (iter->flags & IOMAP_NOWAIT)
+ return -EAGAIN;
+
+ status = iomap_read_folio_sync(block_start, folio,
poff, plen, srcmap);
if (status)
return status;
@@ -603,6 +593,9 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
int status = 0;
+ if (iter->flags & IOMAP_NOWAIT)
+ fgp |= FGP_NOWAIT;
+
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
@@ -622,7 +615,7 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
fgp, mapping_gfp_mask(iter->inode->i_mapping));
if (!folio) {
- status = -ENOMEM;
+ status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
goto out_no_page;
}
if (pos + len > folio_pos(folio) + folio_size(folio))
@@ -740,6 +733,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
loff_t pos = iter->pos;
ssize_t written = 0;
long status = 0;
+ struct address_space *mapping = iter->inode->i_mapping;
+ unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
do {
struct folio *folio;
@@ -752,6 +747,11 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
bytes = min_t(unsigned long, PAGE_SIZE - offset,
iov_iter_count(i));
again:
+ status = balance_dirty_pages_ratelimited_flags(mapping,
+ bdp_flags);
+ if (unlikely(status))
+ break;
+
if (bytes > length)
bytes = length;
@@ -760,6 +760,10 @@ again:
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
+ *
+ * For async buffered writes the assumption is that the user
+ * page has already been faulted in. This can be optimized by
+ * faulting the user page.
*/
if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
status = -EFAULT;
@@ -771,7 +775,7 @@ again:
break;
page = folio_file_page(folio, pos >> PAGE_SHIFT);
- if (mapping_writably_mapped(iter->inode->i_mapping))
+ if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
@@ -796,10 +800,12 @@ again:
pos += status;
written += status;
length -= status;
-
- balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (iov_iter_count(i) && length);
+ if (status == -EAGAIN) {
+ iov_iter_revert(i, written);
+ return -EAGAIN;
+ }
return written ? written : status;
}
@@ -815,6 +821,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
};
int ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ iter.flags |= IOMAP_NOWAIT;
+
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i);
if (iter.pos == iocb->ki_pos)
@@ -917,10 +926,10 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
pos += bytes;
length -= bytes;
written += bytes;
- if (did_zero)
- *did_zero = true;
} while (length > 0);
+ if (did_zero)
+ *did_zero = true;
return written;
}
@@ -1329,7 +1338,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct inode *inode,
struct folio *folio, u64 end_pos)
{
- struct iomap_page *iop = iomap_page_create(inode, folio);
+ struct iomap_page *iop = iomap_page_create(inode, folio, 0);
struct iomap_ioend *ioend, *next;
unsigned len = i_blocksize(inode);
unsigned nblocks = i_blocks_per_folio(inode, folio);
@@ -1478,10 +1487,10 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
pgoff_t end_index = isize >> PAGE_SHIFT;
/*
- * Skip the page if it's fully outside i_size, e.g. due to a
- * truncate operation that's in progress. We must redirty the
- * page so that reclaim stops reclaiming it. Otherwise
- * iomap_release_folio() is called on it and gets confused.
+ * Skip the page if it's fully outside i_size, e.g.
+ * due to a truncate operation that's in progress. We've
+ * cleaned this page and truncate will finish things off for
+ * us.
*
* Note that the end_index is unsigned long. If the given
* offset is greater than 16TB on a 32-bit system then if we
@@ -1496,7 +1505,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
*/
if (folio->index > end_index ||
(folio->index == end_index && poff == 0))
- goto redirty;
+ goto unlock;
/*
* The page straddles i_size. It must be zeroed out on each
@@ -1514,6 +1523,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
redirty:
folio_redirty_for_writepage(wbc, folio);
+unlock:
folio_unlock(folio);
return 0;
}
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 370c3241618a..c75d33d5c3ce 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -52,7 +52,7 @@ struct iomap_dio {
};
static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
- struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf)
+ struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf)
{
if (dio->dops && dio->dops->bio_set)
return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf,
@@ -212,10 +212,10 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
* mapping, and whether or not we want FUA. Note that we can end up
* clearing the WRITE_FUA flag in the dio request.
*/
-static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio,
+static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
const struct iomap *iomap, bool use_fua)
{
- unsigned int opflags = REQ_SYNC | REQ_IDLE;
+ blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
if (!(dio->flags & IOMAP_DIO_WRITE)) {
WARN_ON_ONCE(iomap->flags & IOMAP_F_ZONE_APPEND);
@@ -242,10 +242,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
struct inode *inode = iter->inode;
unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
unsigned int fs_block_size = i_blocksize(inode), pad;
- unsigned int align = iov_iter_alignment(dio->submit.iter);
loff_t length = iomap_length(iter);
loff_t pos = iter->pos;
- unsigned int bio_opf;
+ blk_opf_t bio_opf;
struct bio *bio;
bool need_zeroout = false;
bool use_fua = false;
@@ -253,7 +252,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
size_t copied = 0;
size_t orig_count;
- if ((pos | length | align) & ((1 << blkbits) - 1))
+ if ((pos | length) & ((1 << blkbits) - 1) ||
+ !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
return -EINVAL;
if (iomap->type == IOMAP_UNWRITTEN) {
@@ -548,17 +548,18 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
}
/* for data sync or sync, we need sync completion processing */
- if (iocb->ki_flags & IOCB_DSYNC)
+ if (iocb_is_dsync(iocb) && !(dio_flags & IOMAP_DIO_NOSYNC)) {
dio->flags |= IOMAP_DIO_NEED_SYNC;
- /*
- * For datasync only writes, we optimistically try using FUA for
- * this IO. Any non-FUA write that occurs will clear this flag,
- * hence we know before completion whether a cache flush is
- * necessary.
- */
- if ((iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) == IOCB_DSYNC)
- dio->flags |= IOMAP_DIO_WRITE_FUA;
+ /*
+ * For datasync only writes, we optimistically try
+ * using FUA for this IO. Any non-FUA write that
+ * occurs will clear this flag, hence we know before
+ * completion whether a cache flush is necessary.
+ */
+ if (!(iocb->ki_flags & IOCB_SYNC))
+ dio->flags |= IOMAP_DIO_WRITE_FUA;
+ }
}
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {